diff options
Diffstat (limited to 'fs')
169 files changed, 20918 insertions, 13266 deletions
diff --git a/fs/Config.in b/fs/Config.in index 39161ad95..fe279dc94 100644 --- a/fs/Config.in +++ b/fs/Config.in @@ -6,9 +6,7 @@ comment 'Filesystems' bool 'Quota support' CONFIG_QUOTA tristate 'Minix fs support' CONFIG_MINIX_FS -tristate 'Extended fs support' CONFIG_EXT_FS tristate 'Second extended fs support' CONFIG_EXT2_FS -tristate 'xiafs filesystem support' CONFIG_XIA_FS # msdos filesystems tristate 'DOS FAT fs support' CONFIG_FAT_FS @@ -26,6 +24,19 @@ if [ "$CONFIG_INET" = "y" ]; then bool ' RARP support' CONFIG_RNFS_RARP fi fi + tristate 'NFS server support' CONFIG_NFSD + if [ "$CONFIG_NFS_FS" = "y" -o "$CONFIG_NFSD" = "y" ]; then + define_bool CONFIG_SUNRPC y + define_bool CONFIG_LOCKD y + else + if [ "$CONFIG_NFS_FS" = "m" -o "$CONFIG_NFSD" = "m" ]; then + define_bool CONFIG_SUNRPC m + define_bool CONFIG_LOCKD m + else + define_bool CONFIG_SUNRPC n + define_bool CONFIG_LOCKD n + fi + fi tristate 'SMB filesystem support (to mount WfW shares etc..)' CONFIG_SMB_FS if [ "$CONFIG_SMB_FS" != "n" ]; then bool 'SMB Win95 bug work-around' CONFIG_SMB_WIN95 @@ -38,6 +49,8 @@ tristate 'ISO9660 cdrom filesystem support' CONFIG_ISO9660_FS tristate 'OS/2 HPFS filesystem support (read only)' CONFIG_HPFS_FS tristate 'System V and Coherent filesystem support' CONFIG_SYSV_FS tristate 'Amiga FFS filesystem support' CONFIG_AFFS_FS +tristate 'ROM filesystem support' CONFIG_ROMFS_FS +tristate 'Kernel automounter support (experimental)' CONFIG_AUTOFS_FS if [ "$CONFIG_AFFS_FS" != "n" ]; then define_bool CONFIG_AMIGA_PARTITION y fi @@ -47,3 +60,4 @@ if [ "$CONFIG_UFS_FS" != "n" ]; then bool 'SMD disklabel (Sun partition tables) support' CONFIG_SMD_DISKLABEL fi endmenu + diff --git a/fs/Makefile b/fs/Makefile index 4359e3acc..471a9de5c 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -16,8 +16,8 @@ O_OBJS = open.o read_write.o inode.o devices.o file_table.o buffer.o \ dcache.o $(BINFMTS) MOD_LIST_NAME := FS_MODULES -ALL_SUB_DIRS = minix ext ext2 fat msdos vfat proc isofs nfs xiafs umsdos \ - hpfs sysv smbfs ncpfs ufs affs +ALL_SUB_DIRS = minix ext2 fat msdos vfat proc isofs nfs umsdos \ + hpfs sysv smbfs ncpfs ufs affs romfs autofs lockd nfsd ifeq ($(CONFIG_QUOTA),y) O_OBJS += dquot.o @@ -33,14 +33,6 @@ else endif endif -ifeq ($(CONFIG_EXT_FS),y) -SUB_DIRS += ext -else - ifeq ($(CONFIG_EXT_FS),m) - MOD_SUB_DIRS += ext - endif -endif - ifeq ($(CONFIG_EXT2_FS),y) SUB_DIRS += ext2 else @@ -75,6 +67,10 @@ endif ifdef CONFIG_PROC_FS SUB_DIRS += proc +ifeq ($(CONFIG_SUN_OPENPROMFS),m) +MOD_IN_SUB_DIRS += proc +MOD_TO_LIST += openpromfs.o +endif endif ifeq ($(CONFIG_ISO9660_FS),y) @@ -93,11 +89,20 @@ else endif endif -ifeq ($(CONFIG_XIA_FS),y) -SUB_DIRS += xiafs +ifeq ($(CONFIG_NFSD),y) +CONFIG_LOCKD := y +SUB_DIRS += nfsd else - ifeq ($(CONFIG_XIA_FS),m) - MOD_SUB_DIRS += xiafs + ifeq ($(CONFIG_NFSD),m) + MOD_SUB_DIRS += nfsd + endif +endif + +ifeq ($(CONFIG_LOCKD),y) +SUB_DIRS += lockd +else + ifeq ($(CONFIG_LOCKD),m) + MOD_SUB_DIRS := lockd $(MOD_SUB_DIRS) endif endif @@ -157,6 +162,22 @@ else endif endif +ifeq ($(CONFIG_ROMFS_FS),y) +SUB_DIRS += romfs +else + ifeq ($(CONFIG_ROMFS_FS),m) + MOD_SUB_DIRS += romfs + endif +endif + +ifeq ($(CONFIG_AUTOFS_FS),y) +SUB_DIRS += autofs +else + ifeq ($(CONFIG_AUTOFS_FS),m) + MOD_SUB_DIRS += autofs + endif +endif + ifeq ($(CONFIG_BINFMT_ELF),y) BINFMTS += binfmt_elf.o else @@ -181,6 +202,15 @@ else endif endif +ifeq ($(CONFIG_BINFMT_EM86),y) +BINFMTS += binfmt_em86.o +else + ifeq ($(CONFIG_BINFMT_EM86),m) + M_OBJS += binfmt_em86.o + endif +endif + + # binfmt_script is always there BINFMTS += binfmt_script.o diff --git a/fs/affs/dir.c b/fs/affs/dir.c index b82a17099..f7ec42ede 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -32,7 +32,7 @@ static struct file_operations affs_dir_operations = { affs_dir_read, /* read */ NULL, /* write - bad */ affs_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/affs/file.c b/fs/affs/file.c index aa37f47a0..a450dffce 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -42,14 +42,14 @@ static long affs_file_write(struct inode *inode, struct file *filp, const char * static long affs_file_write_ofs(struct inode *inode, struct file *filp, const char *buf, unsigned long count); static int affs_open_file(struct inode *inode, struct file *filp); -static void affs_release_file(struct inode *inode, struct file *filp); +static int affs_release_file(struct inode *inode, struct file *filp); static struct file_operations affs_file_operations = { NULL, /* lseek - default */ generic_file_read, /* read */ affs_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ generic_file_mmap, /* mmap */ affs_open_file, /* special open is needed */ @@ -83,7 +83,7 @@ static struct file_operations affs_file_operations_ofs = { affs_file_read_ofs, /* read */ affs_file_write_ofs, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ affs_open_file, /* special open is needed */ @@ -883,7 +883,7 @@ affs_open_file(struct inode *inode, struct file *filp) return error; } -static void +static int affs_release_file(struct inode *inode, struct file *filp) { struct affs_zone *zone; @@ -913,4 +913,5 @@ affs_release_file(struct inode *inode, struct file *filp) } } unlock_super(inode->i_sb); + return 0; } diff --git a/fs/affs/inode.c b/fs/affs/inode.c index b44842705..9b6626f7d 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -4,7 +4,7 @@ * (c) 1996 Hans-Joachim Widmaier - Rewritten * * (C) 1993 Ray Burr - Modified for Amiga FFS filesystem. - * + * * (C) 1992 Eric Youngdale Modified for ISO9660 filesystem. * * (C) 1991 Linus Torvalds - minix filesystem @@ -26,6 +26,7 @@ #include <linux/amigaffs.h> #include <linux/major.h> #include <linux/blkdev.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -98,11 +99,11 @@ affs_write_super(struct super_block *sb) pr_debug("AFFS: write_super() at %d, clean=%d\n",CURRENT_TIME,clean); } -static struct super_operations affs_sops = { +static struct super_operations affs_sops = { affs_read_inode, affs_notify_change, affs_write_inode, - affs_put_inode, + affs_put_inode, affs_put_super, affs_write_super, affs_statfs, @@ -467,7 +468,7 @@ affs_read_super(struct super_block *s,void *data, int silent) size = s->u.affs_sb.s_partition_size - reserved; num_bm = (size + s->s_blocksize * 8 - 32 - 1) / (s->s_blocksize * 8 - 32); az_no = (size + AFFS_ZONE_SIZE - 1) / (AFFS_ZONE_SIZE - 32); - ptype = num_bm * sizeof(struct affs_bm_info) + + ptype = num_bm * sizeof(struct affs_bm_info) + az_no * sizeof(struct affs_alloc_zone) + MAX_ZONES * sizeof(struct affs_zone); pr_debug("num_bm=%d, az_no=%d, sum=%d\n",num_bm,az_no,ptype); @@ -666,7 +667,7 @@ affs_read_inode(struct inode *inode) inode->u.affs_i.i_protect = prot; inode->u.affs_i.i_parent = htonl(file_end->parent); - inode->u.affs_i.i_original = 0; + inode->u.affs_i.i_original = 0; inode->u.affs_i.i_zone = 0; inode->u.affs_i.i_hlink = 0; inode->u.affs_i.i_pa_cnt = 0; @@ -682,7 +683,7 @@ affs_read_inode(struct inode *inode) inode->i_mode = inode->i_sb->u.affs_sb.s_mode; else inode->i_mode = prot_to_mode(prot); - + if (inode->i_sb->u.affs_sb.s_flags & SF_SETUID) inode->i_uid = inode->i_sb->u.affs_sb.s_uid; else { @@ -761,7 +762,7 @@ affs_read_inode(struct inode *inode) sys_tz.tz_minuteswest * 60; affs_brelse(bh); affs_brelse(lbh); - + inode->i_op = NULL; if (S_ISREG(inode->i_mode)) { if (inode->i_sb->u.affs_sb.s_flags & SF_OFS) { @@ -829,16 +830,16 @@ affs_notify_change(struct inode *inode, struct iattr *attr) error = inode_change_ok(inode,attr); if (error) return error; - + if (((attr->ia_valid & ATTR_UID) && (inode->i_sb->u.affs_sb.s_flags & SF_SETUID)) || ((attr->ia_valid & ATTR_GID) && (inode->i_sb->u.affs_sb.s_flags & SF_SETGID)) || ((attr->ia_valid & ATTR_MODE) && (inode->i_sb->u.affs_sb.s_flags & (SF_SETMODE | SF_IMMUTABLE)))) error = -EPERM; - + if (error) return (inode->i_sb->u.affs_sb.s_flags & SF_QUIET) ? 0 : error; - + if (attr->ia_valid & ATTR_MODE) inode->u.affs_i.i_protect = mode_to_prot(attr->ia_mode); @@ -870,7 +871,7 @@ affs_new_inode(const struct inode *dir) if (!dir || !(inode = get_empty_inode())) return NULL; - + sb = dir->i_sb; inode->i_sb = sb; inode->i_flags = sb->s_flags; @@ -950,12 +951,12 @@ affs_add_entry(struct inode *dir, struct inode *link, struct inode *inode, hash = affs_hash_name(name,len,AFFS_I2FSTYPE(dir),AFFS_I2HSIZE(dir)); lock_super(inode->i_sb); - DIR_END(inode_bh->b_data,inode)->hash_chain = + DIR_END(inode_bh->b_data,inode)->hash_chain = ((struct dir_front *)dir_bh->b_data)->hashtable[hash]; ((struct dir_front *)dir_bh->b_data)->hashtable[hash] = ntohl(inode->i_ino); if (link_bh) { LINK_END(inode_bh->b_data,inode)->original = ntohl(link->i_ino); - LINK_END(inode_bh->b_data,inode)->link_chain = + LINK_END(inode_bh->b_data,inode)->link_chain = FILE_END(link_bh->b_data,link)->link_chain; FILE_END(link_bh->b_data,link)->link_chain = ntohl(inode->i_ino); affs_fix_checksum(AFFS_I2BSIZE(link),link_bh->b_data,5); @@ -987,21 +988,18 @@ static struct file_system_type affs_fs_type = { NULL }; -int -init_affs_fs(void) +__initfunc(int init_affs_fs(void)) { return register_filesystem(&affs_fs_type); } #ifdef MODULE +EXPORT_NO_SYMBOLS; int init_module(void) { - int status; - if ((status = init_affs_fs()) == 0) - register_symtab(0); - return status; + return init_affs_fs(); } void diff --git a/fs/xiafs/Makefile b/fs/autofs/Makefile index e596cc559..12f302635 100644 --- a/fs/xiafs/Makefile +++ b/fs/autofs/Makefile @@ -1,14 +1,16 @@ # -# Makefile for the XIAFS filesystem routines. +# Makefile for the linux autofs-filesystem routines. # # Note! Dependencies are done automagically by 'make dep', which also # removes any old dependencies. DON'T put your own dependencies here # unless it's something special (ie not a .c file). # # Note 2! The CFLAGS definitions are now in the main makefile... +# + +O_TARGET := autofs.o +O_OBJS := dir.o dirhash.o init.o inode.o root.o symlink.o waitq.o -O_TARGET := xiafs.o -O_OBJS := bitmap.o truncate.o namei.o inode.o file.o dir.o symlink.o fsync.o M_OBJS := $(O_TARGET) include $(TOPDIR)/Rules.make diff --git a/fs/autofs/dir.c b/fs/autofs/dir.c new file mode 100644 index 000000000..22081d1a7 --- /dev/null +++ b/fs/autofs/dir.c @@ -0,0 +1,90 @@ +/* -*- linux-c -*- --------------------------------------------------------- * + * + * linux/fs/autofs/dir.c + * + * Copyright 1997 Transmeta Corporation -- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * ------------------------------------------------------------------------- */ + +#include <linux/auto_fs.h> + +static int autofs_dir_readdir(struct inode *inode, struct file *filp, + void *dirent, filldir_t filldir) +{ + if (!inode || !S_ISDIR(inode->i_mode)) + return -ENOTDIR; + + switch((unsigned long) filp->f_pos) + { + case 0: + if (filldir(dirent, ".", 1, 0, inode->i_ino) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, 1, AUTOFS_ROOT_INO) < 0) + return 0; + filp->f_pos++; + /* fall through */ + } + return 1; +} + +static int autofs_dir_lookup(struct inode *dir, const char *name, int len, + struct inode **result) +{ + *result = dir; + if (!len) + return 0; + if (name[0] == '.') { + if (len == 1) + return 0; + if (name[1] == '.' && len == 2) { + /* Return the root directory */ + *result = iget(dir->i_sb,AUTOFS_ROOT_INO); + iput(dir); + return 0; + } + } + *result = NULL; + iput(dir); + return -ENOENT; /* No other entries */ +} + +static struct file_operations autofs_dir_operations = { + NULL, /* lseek */ + NULL, /* read */ + NULL, /* write */ + autofs_dir_readdir, /* readdir */ + NULL, /* select */ + NULL, /* ioctl */ + NULL, /* mmap */ + NULL, /* open */ + NULL, /* release */ + NULL /* fsync */ +}; + +struct inode_operations autofs_dir_inode_operations = { + &autofs_dir_operations, /* file operations */ + NULL, /* create */ + autofs_dir_lookup, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* read_page */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c new file mode 100644 index 000000000..8ea5325c4 --- /dev/null +++ b/fs/autofs/dirhash.c @@ -0,0 +1,129 @@ +/* -*- linux-c -*- --------------------------------------------------------- * + * + * linux/fs/autofs/dirhash.c + * + * Copyright 1997 Transmeta Corporation -- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * ------------------------------------------------------------------------- */ + +#include <linux/string.h> +#include <linux/malloc.h> +#include <linux/auto_fs.h> + +/* Adapted from the Dragon Book, page 436 */ +/* This particular hashing algorithm requires autofs_hash_t == u32 */ +autofs_hash_t autofs_hash(const char *name, int len) +{ + autofs_hash_t h = 0; + while ( len-- ) { + h = (h << 4) + (unsigned char) (*name++); + h ^= ((h & 0xf0000000) >> 24); + } + return h; +} + +void autofs_initialize_hash(struct autofs_dirhash *dh) { + memset(&dh->h, 0, AUTOFS_HASH_SIZE*sizeof(struct autofs_dir_ent *)); +} + +struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *dh, autofs_hash_t hash, const char *name, int len) +{ + struct autofs_dir_ent *dhn; + + DPRINTK(("autofs_hash_lookup: hash = 0x%08x, name = ", hash)); + autofs_say(name,len); + + for ( dhn = dh->h[hash % AUTOFS_HASH_SIZE] ; dhn ; dhn = dhn->next ) { + if ( hash == dhn->hash && + len == dhn->len && + !memcmp(name, dhn->name, len) ) + break; + } + + return dhn; +} + +void autofs_hash_insert(struct autofs_dirhash *dh, struct autofs_dir_ent *ent) +{ + struct autofs_dir_ent **dhnp; + + DPRINTK(("autofs_hash_insert: hash = 0x%08x, name = ", ent->hash)); + autofs_say(ent->name,ent->len); + + dhnp = &dh->h[ent->hash % AUTOFS_HASH_SIZE]; + ent->next = *dhnp; + ent->back = dhnp; + *dhnp = ent; +} + +void autofs_hash_delete(struct autofs_dir_ent *ent) +{ + *(ent->back) = ent->next; + kfree(ent->name); + kfree(ent); +} + +/* + * Used by readdir(). We must validate "ptr", so we can't simply make it + * a pointer. Values below 0xffff are reserved; calling with any value + * <= 0x10000 will return the first entry found. + */ +struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *dh, off_t *ptr) +{ + int bucket, ecount, i; + struct autofs_dir_ent *ent; + + bucket = (*ptr >> 16) - 1; + ecount = *ptr & 0xffff; + + if ( bucket < 0 ) { + bucket = ecount = 0; + } + + DPRINTK(("autofs_hash_enum: bucket %d, entry %d\n", bucket, ecount)); + + ent = NULL; + + while ( bucket < AUTOFS_HASH_SIZE ) { + ent = dh->h[bucket]; + for ( i = ecount ; ent && i ; i-- ) + ent = ent->next; + + if (ent) { + ecount++; /* Point to *next* entry */ + break; + } + + bucket++; ecount = 0; + } + +#ifdef DEBUG + if ( !ent ) + printk("autofs_hash_enum: nothing found\n"); + else { + printk("autofs_hash_enum: found hash %08x, name", ent->hash); + autofs_say(ent->name,ent->len); + } +#endif + + *ptr = ((bucket+1) << 16) + ecount; + return ent; +} + +void autofs_hash_nuke(struct autofs_dirhash *dh) +{ + int i; + struct autofs_dir_ent *ent, *nent; + + for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) { + for ( ent = dh->h[i] ; ent ; ent = nent ) { + nent = ent->next; + kfree(ent->name); + kfree(ent); + } + } +} diff --git a/fs/autofs/init.c b/fs/autofs/init.c new file mode 100644 index 000000000..a4857cb99 --- /dev/null +++ b/fs/autofs/init.c @@ -0,0 +1,49 @@ +/* -*- linux-c -*- --------------------------------------------------------- * + * + * linux/fs/autofs/init.c + * + * Copyright 1997 Transmeta Corporation -- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * ------------------------------------------------------------------------- */ + +#include <linux/module.h> +#include <linux/auto_fs.h> + +struct file_system_type autofs_fs_type = { + autofs_read_super, "autofs", 0, NULL +}; + +int init_autofs_fs(void) +{ + return register_filesystem(&autofs_fs_type); +} + +#ifdef MODULE +int init_module(void) +{ + int status; + + if ((status = init_autofs_fs()) == 0) + register_symtab(0); + return status; +} + +void cleanup_module(void) +{ + unregister_filesystem(&autofs_fs_type); +} +#endif + +#ifdef DEBUG +void autofs_say(const char *name, int len) +{ + printk("(%d: ", len); + while ( len-- ) + printk("%c", *name++); + printk(")\n"); +} +#endif diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c new file mode 100644 index 000000000..60b805a07 --- /dev/null +++ b/fs/autofs/inode.c @@ -0,0 +1,273 @@ +/* -*- linux-c -*- --------------------------------------------------------- * + * + * linux/fs/autofs/inode.c + * + * Copyright 1997 Transmeta Corporation -- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * ------------------------------------------------------------------------- */ + +#include <linux/kernel.h> +#include <linux/malloc.h> +#include <linux/file.h> +#include <linux/locks.h> +#include <asm/bitops.h> +#include <linux/auto_fs.h> +#define __NO_VERSION__ +#include <linux/module.h> + +static void autofs_put_inode(struct inode *inode) +{ + if (inode->i_nlink) + return; + inode->i_size = 0; +} + +static void autofs_put_super(struct super_block *sb) +{ + struct autofs_sb_info *sbi; + unsigned int n; + + lock_super(sb); + sbi = (struct autofs_sb_info *) sb->u.generic_sbp; + autofs_hash_nuke(&sbi->dirhash); + for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) { + if ( test_bit(n, sbi->symlink_bitmap) ) + kfree(sbi->symlink[n].data); + } + fput(sbi->pipe, sbi->pipe->f_inode); + + sb->s_dev = 0; + kfree(sb->u.generic_sbp); + unlock_super(sb); + + DPRINTK(("autofs: shutting down\n")); + +#ifdef MODULE + MOD_DEC_USE_COUNT; +#endif +} + +static void autofs_statfs(struct super_block *sb, struct statfs *buf, int bufsiz); +static void autofs_read_inode(struct inode *inode); +static void autofs_write_inode(struct inode *inode); + +static struct super_operations autofs_sops = { + autofs_read_inode, + NULL, + autofs_write_inode, + autofs_put_inode, + autofs_put_super, + NULL, + autofs_statfs, + NULL +}; + +static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, pid_t *pgrp, int *minproto, int *maxproto) +{ + char *this_char, *value; + + *uid = current->uid; + *gid = current->gid; + *pgrp = current->pgrp; + + *minproto = *maxproto = AUTOFS_PROTO_VERSION; + + *pipefd = -1; + + if ( !options ) return 1; + for (this_char = strtok(options,","); this_char; this_char = strtok(NULL,",")) { + if ((value = strchr(this_char,'=')) != NULL) + *value++ = 0; + if (!strcmp(this_char,"fd")) { + if (!value || !*value) + return 1; + *pipefd = simple_strtoul(value,&value,0); + if (*value) + return 1; + } + else if (!strcmp(this_char,"uid")) { + if (!value || !*value) + return 1; + *uid = simple_strtoul(value,&value,0); + if (*value) + return 1; + } + else if (!strcmp(this_char,"gid")) { + if (!value || !*value) + return 1; + *gid = simple_strtoul(value,&value,0); + if (*value) + return 1; + } + else if (!strcmp(this_char,"pgrp")) { + if (!value || !*value) + return 1; + *pgrp = simple_strtoul(value,&value,0); + if (*value) + return 1; + } + else if (!strcmp(this_char,"minproto")) { + if (!value || !*value) + return 1; + *minproto = simple_strtoul(value,&value,0); + if (*value) + return 1; + } + else if (!strcmp(this_char,"maxproto")) { + if (!value || !*value) + return 1; + *maxproto = simple_strtoul(value,&value,0); + if (*value) + return 1; + } + else break; + } + return (*pipefd < 0); +} + +struct super_block *autofs_read_super(struct super_block *s, void *data, + int silent) +{ + int pipefd; + struct autofs_sb_info *sbi; + int minproto, maxproto; + + MOD_INC_USE_COUNT; + + lock_super(s); + sbi = (struct autofs_sb_info *) kmalloc(sizeof(struct autofs_sb_info), GFP_KERNEL); + if ( !sbi ) { + s->s_dev = 0; + MOD_DEC_USE_COUNT; + return NULL; + } + DPRINTK(("autofs: starting up, sbi = %p\n",sbi)); + + s->u.generic_sbp = sbi; + sbi->catatonic = 0; + sbi->oz_pgrp = current->pgrp; + autofs_initialize_hash(&sbi->dirhash); + sbi->queues = NULL; + memset(sbi->symlink_bitmap, 0, sizeof(u32)*AUTOFS_SYMLINK_BITMAP_LEN); + sbi->next_dir_ino = AUTOFS_FIRST_DIR_INO; + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->s_magic = AUTOFS_SUPER_MAGIC; + s->s_op = &autofs_sops; + unlock_super(s); + if (!(s->s_mounted = iget(s, AUTOFS_ROOT_INO))) { + s->s_dev = 0; + kfree(sbi); + printk("autofs: get root inode failed\n"); + MOD_DEC_USE_COUNT; + return NULL; + } + + if ( parse_options(data,&pipefd,&s->s_mounted->i_uid,&s->s_mounted->i_gid,&sbi->oz_pgrp,&minproto,&maxproto) ) { + iput(s->s_mounted); + s->s_dev = 0; + kfree(sbi); + printk("autofs: called with bogus options\n"); + MOD_DEC_USE_COUNT; + return NULL; + } + + if ( minproto > AUTOFS_PROTO_VERSION || maxproto < AUTOFS_PROTO_VERSION ) { + iput(s->s_mounted); + s->s_dev = 0; + kfree(sbi); + printk("autofs: kernel does not match daemon version\n"); + MOD_DEC_USE_COUNT; + return NULL; + } + + DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, sbi->oz_pgrp)); + sbi->pipe = fget(pipefd); + if ( !sbi->pipe || !sbi->pipe->f_op || !sbi->pipe->f_op->write ) { + if ( sbi->pipe ) { + fput(sbi->pipe, sbi->pipe->f_inode); + printk("autofs: pipe file descriptor does not contain proper ops\n"); + } else { + printk("autofs: could not open pipe file descriptor\n"); + } + iput(s->s_mounted); + s->s_dev = 0; + kfree(sbi); + MOD_DEC_USE_COUNT; + return NULL; + } + return s; +} + +static void autofs_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) +{ + struct statfs tmp; + + tmp.f_type = AUTOFS_SUPER_MAGIC; + tmp.f_bsize = 1024; + tmp.f_blocks = 0; + tmp.f_bfree = 0; + tmp.f_bavail = 0; + tmp.f_files = 0; + tmp.f_ffree = 0; + tmp.f_namelen = NAME_MAX; + copy_to_user(buf, &tmp, bufsiz); +} + +static void autofs_read_inode(struct inode *inode) +{ + ino_t ino = inode->i_ino; + unsigned int n; + struct autofs_sb_info *sbi = + (struct autofs_sb_info *) inode->i_sb->u.generic_sbp; + + inode->i_op = NULL; + inode->i_mode = 0; + inode->i_nlink = 2; + inode->i_size = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_blocks = 0; + inode->i_blksize = 1024; + + if ( ino == AUTOFS_ROOT_INO ) { + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; + inode->i_op = &autofs_root_inode_operations; + inode->i_uid = inode->i_gid = 0; /* Changed in read_super */ + return; + } + + inode->i_uid = inode->i_sb->s_mounted->i_uid; + inode->i_gid = inode->i_sb->s_mounted->i_gid; + + if ( ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO ) { + /* Symlink inode - should be in symlink list */ + struct autofs_symlink *sl; + + n = ino - AUTOFS_FIRST_SYMLINK; + if ( n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) { + printk("autofs: Looking for bad symlink inode 0x%08x\n", (unsigned int) ino); + return; + } + + inode->i_op = &autofs_symlink_inode_operations; + sl = &sbi->symlink[n]; + inode->u.generic_ip = sl; + inode->i_mode = S_IFLNK | S_IRWXUGO; + inode->i_mtime = inode->i_ctime = sl->mtime; + inode->i_size = sl->len; + inode->i_nlink = 1; + } else { + /* All non-root directory inodes look the same */ + inode->i_op = &autofs_dir_inode_operations; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + } +} + +static void autofs_write_inode(struct inode *inode) +{ + inode->i_dirt = 0; +} diff --git a/fs/autofs/root.c b/fs/autofs/root.c new file mode 100644 index 000000000..d9056dcb1 --- /dev/null +++ b/fs/autofs/root.c @@ -0,0 +1,362 @@ +/* -*- linux-c -*- --------------------------------------------------------- * + * + * linux/fs/autofs/root.c + * + * Copyright 1997 Transmeta Corporation -- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * ------------------------------------------------------------------------- */ + +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/malloc.h> +#include <linux/ioctl.h> +#include <linux/auto_fs.h> + +static int autofs_root_readdir(struct inode *,struct file *,void *,filldir_t); +static int autofs_root_lookup(struct inode *,const char *,int,struct inode **); +static int autofs_root_symlink(struct inode *,const char *,int,const char *); +static int autofs_root_unlink(struct inode *,const char *,int); +static int autofs_root_rmdir(struct inode *,const char *,int); +static int autofs_root_mkdir(struct inode *,const char *,int,int); +static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); + +static struct file_operations autofs_root_operations = { + NULL, /* lseek */ + NULL, /* read */ + NULL, /* write */ + autofs_root_readdir, /* readdir */ + NULL, /* select */ + autofs_root_ioctl, /* ioctl */ + NULL, /* mmap */ + NULL, /* open */ + NULL, /* release */ + NULL /* fsync */ +}; + +struct inode_operations autofs_root_inode_operations = { + &autofs_root_operations, /* file operations */ + NULL, /* create */ + autofs_root_lookup, /* lookup */ + NULL, /* link */ + autofs_root_unlink, /* unlink */ + autofs_root_symlink, /* symlink */ + autofs_root_mkdir, /* mkdir */ + autofs_root_rmdir, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int autofs_root_readdir(struct inode *inode, struct file *filp, + void *dirent, filldir_t filldir) +{ + struct autofs_dir_ent *ent; + struct autofs_dirhash *dirhash; + off_t onr, nr; + + if (!inode || !S_ISDIR(inode->i_mode)) + return -ENOTDIR; + + dirhash = &((struct autofs_sb_info *)inode->i_sb->u.generic_sbp)->dirhash; + nr = filp->f_pos; + + switch(nr) + { + case 0: + if (filldir(dirent, ".", 1, nr, inode->i_ino) < 0) + return 0; + filp->f_pos = ++nr; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, nr, inode->i_ino) < 0) + return 0; + filp->f_pos = ++nr; + /* fall through */ + default: + while ( onr = nr, ent = autofs_hash_enum(dirhash,&nr) ) { + if (filldir(dirent,ent->name,ent->len,onr,ent->ino) < 0) + return 0; + filp->f_pos = nr; + } + break; + } + + return 0; +} + +static int autofs_root_lookup(struct inode *dir, const char *name, int len, + struct inode **result) +{ + struct autofs_sb_info *sbi; + struct autofs_dir_ent *ent; + struct inode *res; + autofs_hash_t hash; + int status, oz_mode; + + DPRINTK(("autofs_root_lookup: name = ")); + autofs_say(name,len); + + *result = NULL; + if (!dir) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOTDIR; + } + + /* Handle special cases: . and ..; since this is a root directory, + they both point to the inode itself */ + *result = dir; + if (!len) + return 0; + if (name[0] == '.') { + if (len == 1) + return 0; + if (name[1] == '.' && len == 2) + return 0; + } + + *result = res = NULL; + sbi = (struct autofs_sb_info *) dir->i_sb->u.generic_sbp; + + hash = autofs_hash(name,len); + + oz_mode = autofs_oz_mode(sbi); + DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", current->pid, current->pgrp, sbi->catatonic, oz_mode)); + + do { + while ( !(ent = autofs_hash_lookup(&sbi->dirhash,hash,name,len)) ) { + DPRINTK(("lookup failed, pid = %u, pgrp = %u\n", current->pid, current->pgrp)); + + if ( oz_mode ) { + iput(dir); + return -ENOENT; + } else { + status = autofs_wait(sbi,hash,name,len); + DPRINTK(("autofs_wait returned %d\n", status)); + if ( status ) { + iput(dir); + return status; + } + } + } + + DPRINTK(("lookup successful, inode = %08x\n", (unsigned int)ent->ino)); + + if (!(res = iget(dir->i_sb,ent->ino))) { + printk("autofs: iget returned null!\n"); + iput(dir); + return -EACCES; + } + + if ( !oz_mode && S_ISDIR(res->i_mode) && res->i_sb == dir->i_sb ) { + /* Not a mount point yet, call 1-800-DAEMON */ + DPRINTK(("autofs: waiting on non-mountpoint dir, inode = %lu, pid = %u, pgrp = %u\n", res->i_ino, current->pid, current->pgrp)); + iput(res); + res = NULL; + status = autofs_wait(sbi,hash,name,len); + if ( status ) { + iput(dir); + return status; + } + } + } while(!res); + + *result = res; + iput(dir); + return 0; +} + +static int autofs_root_symlink(struct inode *dir, const char *name, int len, const char *symname) +{ + struct autofs_sb_info *sbi = (struct autofs_sb_info *) dir->i_sb->u.generic_sbp; + struct autofs_dirhash *dh = &sbi->dirhash; + autofs_hash_t hash = autofs_hash(name,len); + struct autofs_dir_ent *ent; + unsigned int n; + int slsize; + struct autofs_symlink *sl; + + DPRINTK(("autofs_root_symlink: %s <- ", symname)); + autofs_say(name,len); + + iput(dir); + + if ( !autofs_oz_mode(sbi) ) + return -EPERM; + + if ( autofs_hash_lookup(dh,hash,name,len) ) + return -EEXIST; + + n = find_first_zero_bit(sbi->symlink_bitmap,AUTOFS_MAX_SYMLINKS); + if ( n >= AUTOFS_MAX_SYMLINKS ) + return -ENOSPC; + + set_bit(n,sbi->symlink_bitmap); + sl = &sbi->symlink[n]; + sl->len = strlen(symname); + sl->data = kmalloc(slsize = sl->len+1, GFP_KERNEL); + if ( !sl->data ) { + clear_bit(n,sbi->symlink_bitmap); + return -ENOSPC; + } + ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL); + if ( !ent ) { + kfree(sl->data); + clear_bit(n,sbi->symlink_bitmap); + return -ENOSPC; + } + ent->name = kmalloc(len, GFP_KERNEL); + if ( !ent->name ) { + kfree(sl->data); + kfree(ent); + clear_bit(n,sbi->symlink_bitmap); + return -ENOSPC; + } + memcpy(sl->data,symname,slsize); + sl->mtime = CURRENT_TIME; + + ent->ino = AUTOFS_FIRST_SYMLINK + n; + ent->hash = hash; + memcpy(ent->name,name,ent->len = len); + ent->expiry = END_OF_TIME; + + autofs_hash_insert(dh,ent); + + return 0; +} + +static int autofs_root_unlink(struct inode *dir, const char *name, int len) +{ + struct autofs_sb_info *sbi = (struct autofs_sb_info *) dir->i_sb->u.generic_sbp; + struct autofs_dirhash *dh = &sbi->dirhash; + autofs_hash_t hash = autofs_hash(name,len); + struct autofs_dir_ent *ent; + unsigned int n; + + if ( !autofs_oz_mode(sbi) ) + return -EPERM; + + ent = autofs_hash_lookup(dh,hash,name,len); + if ( !ent ) + return -ENOENT; + n = ent->ino - AUTOFS_FIRST_SYMLINK; + if ( n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap) ) + return -EINVAL; /* Not a symlink inode, can't unlink */ + autofs_hash_delete(ent); + clear_bit(n,sbi->symlink_bitmap); + kfree(sbi->symlink[n].data); + + return 0; +} + +static int autofs_root_rmdir(struct inode *dir, const char *name, int len) +{ + struct autofs_sb_info *sbi = (struct autofs_sb_info *) dir->i_sb->u.generic_sbp; + struct autofs_dirhash *dh = &sbi->dirhash; + autofs_hash_t hash = autofs_hash(name,len); + struct autofs_dir_ent *ent; + + if ( !autofs_oz_mode(sbi) ) { + iput(dir); + return -EPERM; + } + ent = autofs_hash_lookup(dh,hash,name,len); + if ( !ent ) { + iput(dir); + return -ENOENT; + } + if ( (unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO ) { + iput(dir); + return -ENOTDIR; /* Not a directory */ + } + autofs_hash_delete(ent); + dir->i_nlink--; + iput(dir); + + return 0; +} + +static int autofs_root_mkdir(struct inode *dir, const char *name, int len, int mode) +{ + struct autofs_sb_info *sbi = (struct autofs_sb_info *) dir->i_sb->u.generic_sbp; + struct autofs_dirhash *dh = &sbi->dirhash; + autofs_hash_t hash = autofs_hash(name,len); + struct autofs_dir_ent *ent; + + if ( !autofs_oz_mode(sbi) ) { + iput(dir); + return -EPERM; + } + ent = autofs_hash_lookup(dh,hash,name,len); + if ( ent ) { + iput(dir); + return -EEXIST; + } + if ( sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO ) { + printk("autofs: Out of inode numbers -- what the heck did you do??\n"); + iput(dir); + return -ENOSPC; + } + ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL); + if ( !ent ) { + iput(dir); + return -ENOSPC; + } + ent->name = kmalloc(len, GFP_KERNEL); + if ( !ent->name ) { + kfree(ent); + iput(dir); + return -ENOSPC; + } + ent->hash = hash; + memcpy(ent->name, name, ent->len = len); + ent->ino = sbi->next_dir_ino++; + ent->expiry = END_OF_TIME; + autofs_hash_insert(dh,ent); + dir->i_nlink++; + iput(dir); + + return 0; +} + +/* + * ioctl()'s on the root directory is the chief method for the daemon to + * generate kernel reactions + */ +static int autofs_root_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct autofs_sb_info *sbi = (struct autofs_sb_info *)inode->i_sb->u.generic_sbp; + + DPRINTK(("autofs_ioctl: cmd = %04x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,current->pgrp)); + + switch(cmd) { + case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */ + if ( !autofs_oz_mode(sbi) && !fsuser() ) + return -EPERM; + return autofs_wait_release(sbi,arg,0); + case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */ + /* Optional: add to failure cache */ + if ( !autofs_oz_mode(sbi) && !fsuser() ) + return -EPERM; + return autofs_wait_release(sbi,arg,-ENOENT); + case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */ + if ( !autofs_oz_mode(sbi) && !fsuser() ) + return -EPERM; + autofs_catatonic_mode(sbi); + return 0; + default: + return -ENOTTY; /* Should this be ENOSYS? */ + } +} diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c new file mode 100644 index 000000000..0e932c169 --- /dev/null +++ b/fs/autofs/symlink.c @@ -0,0 +1,85 @@ +/* -*- linux-c -*- --------------------------------------------------------- * + * + * linux/fs/autofs/symlink.c + * + * Copyright 1997 Transmeta Corporation -- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * ------------------------------------------------------------------------- */ + +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/auto_fs.h> + +static int autofs_follow_link(struct inode *dir, struct inode *inode, + int flag, int mode, struct inode **res_inode) +{ + int error; + char *link; + + *res_inode = NULL; + if (!dir) { + dir = current->fs->root; + dir->i_count++; + } + if (!inode) { + iput(dir); + return -ENOENT; + } + if (!S_ISLNK(inode->i_mode)) { + iput(dir); + *res_inode = inode; + return 0; + } + if (current->link_count > 5) { + iput(dir); + iput(inode); + return -ELOOP; + } + link = ((struct autofs_symlink *)inode->u.generic_ip)->data; + current->link_count++; + error = open_namei(link,flag,mode,res_inode,dir); + current->link_count--; + iput(inode); + return error; +} + +static int autofs_readlink(struct inode *inode, char *buffer, int buflen) +{ + struct autofs_symlink *sl; + int len; + + if (!S_ISLNK(inode->i_mode)) { + iput(inode); + return -EINVAL; + } + sl = (struct autofs_symlink *)inode->u.generic_ip; + len = sl->len; + if (len > buflen) len = buflen; + copy_to_user(buffer,sl->data,len); + iput(inode); + return len; +} + +struct inode_operations autofs_symlink_inode_operations = { + NULL, /* file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + autofs_readlink, /* readlink */ + autofs_follow_link, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c new file mode 100644 index 000000000..6dc6d0b13 --- /dev/null +++ b/fs/autofs/waitq.c @@ -0,0 +1,162 @@ +/* -*- linux-c -*- --------------------------------------------------------- * + * + * linux/fs/autofs/waitq.c + * + * Copyright 1997 Transmeta Corporation -- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * ------------------------------------------------------------------------- */ + +#include <linux/malloc.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/auto_fs.h> + +/* We make this a static variable rather than a part of the superblock; it + is better if we don't reassign numbers easily even across filesystems */ +static int autofs_next_wait_queue = 1; + +void autofs_catatonic_mode(struct autofs_sb_info *sbi) +{ + struct autofs_wait_queue *wq, *nwq; + + DPRINTK(("autofs: entering catatonic mode\n")); + + sbi->catatonic = 1; + wq = sbi->queues; + sbi->queues = NULL; /* Erase all wait queues */ + while ( wq ) { + nwq = wq->next; + wq->status = -ENOENT; /* Magic is gone - report failure */ + kfree(wq->name); + wq->name = NULL; + wake_up(&wq->queue); + wq = nwq; + } +} + +static int autofs_write(struct file *file, const void *addr, int bytes) +{ + unsigned short fs; + unsigned long old_signal; + const char *data = (const char *)addr; + int written; + + /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/ + + /* Save pointer to user space and point back to kernel space */ + fs = get_fs(); + set_fs(KERNEL_DS); + + old_signal = current->signal; + + while ( bytes && (written = file->f_op->write(file->f_inode,file,data,bytes)) > 0 ) { + data += written; + bytes -= written; + } + + if ( written == -EPIPE && !(old_signal & (1 << (SIGPIPE-1))) ) { + /* Keep the currently executing process from receiving a + SIGPIPE unless it was already supposed to get one */ + current->signal &= ~(1 << (SIGPIPE-1)); + } + set_fs(fs); + + return (bytes > 0); +} + +static void autofs_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq) +{ + struct autofs_packet_missing pkt; + + DPRINTK(("autofs_wait: wait id = 0x%08lx, name = ", wq->wait_queue_token)); + autofs_say(wq->name,wq->len); + + pkt.hdr.proto_version = AUTOFS_PROTO_VERSION; + pkt.hdr.type = autofs_ptype_missing; + pkt.wait_queue_token = wq->wait_queue_token; + pkt.len = wq->len; + memcpy(pkt.name, wq->name, pkt.len); + pkt.name[pkt.len] = '\0'; + + if ( autofs_write(sbi->pipe,&pkt,sizeof(struct autofs_packet_missing)) ) + autofs_catatonic_mode(sbi); +} + +int autofs_wait(struct autofs_sb_info *sbi, autofs_hash_t hash, const char *name, int len) +{ + struct autofs_wait_queue *wq; + int status; + + for ( wq = sbi->queues ; wq ; wq = wq->next ) { + if ( wq->hash == hash && + wq->len == len && + !memcmp(wq->name,name,len) ) + break; + } + + if ( !wq ) { + /* Create a new wait queue */ + wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); + if ( !wq ) + return -ENOMEM; + + wq->name = kmalloc(len,GFP_KERNEL); + if ( !wq->name ) { + kfree(wq); + return -ENOMEM; + } + wq->wait_queue_token = autofs_next_wait_queue++; + init_waitqueue(&wq->queue); + wq->hash = hash; + wq->len = len; + memcpy(wq->name, name, len); + wq->next = sbi->queues; + sbi->queues = wq; + + /* autofs_notify_daemon() may block */ + wq->wait_ctr++; + autofs_notify_daemon(sbi,wq); + } else + wq->wait_ctr++; + + if ( wq->name ) { + /* wq->name is NULL if and only if the lock is released */ + interruptible_sleep_on(&wq->queue); + } else { + DPRINTK(("autofs_wait: skipped sleeping\n")); + } + + status = (current->signal & ~current->blocked) ? -EINTR : wq->status; + if ( ! --wq->wait_ctr ) /* Are we the last process to need status? */ + kfree(wq); + + return status; +} + + +int autofs_wait_release(struct autofs_sb_info *sbi, unsigned long wait_queue_token, int status) +{ + struct autofs_wait_queue *wq, **wql; + + for ( wql = &sbi->queues ; (wq = *wql) ; wql = &wq->next ) { + if ( wq->wait_queue_token == wait_queue_token ) + break; + } + if ( !wq ) + return -EINVAL; + + *wql = wq->next; /* Unlink from chain */ + kfree(wq->name); + wq->name = NULL; /* Do not wait on this queue */ + + wq->status = status; + + wake_up(&wq->queue); + + return 0; +} + diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index cb535656a..d9ef6d6ac 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -22,6 +22,7 @@ #include <linux/malloc.h> #include <linux/binfmts.h> #include <linux/personality.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -37,7 +38,7 @@ static struct linux_binfmt aout_format = { #ifndef MODULE NULL, NULL, load_aout_binary, load_aout_library, aout_core_dump #else - NULL, &mod_use_count_, load_aout_binary, load_aout_library, aout_core_dump + NULL, &__this_module, load_aout_binary, load_aout_library, aout_core_dump #endif }; @@ -63,7 +64,7 @@ while (file.f_op->write(inode,&file,(char *)(addr),(nr)) != (nr)) goto close_cor if (file.f_op->llseek) { \ if (file.f_op->llseek(inode,&file,(offset),0) != (offset)) \ goto close_coredump; \ -} else file.f_pos = (offset) +} else file.f_pos = (offset) /* * Routine writes a core dump image in the current directory. @@ -85,11 +86,18 @@ do_aout_core_dump(long signr, struct pt_regs * regs) char corefile[6+sizeof(current->comm)]; unsigned long dump_start, dump_size; struct user dump; -#ifdef __alpha__ +#if defined(__alpha__) # define START_DATA(u) (u.start_data) -#else +#elif defined(__sparc__) +# define START_DATA(u) (u.u_tsize) +#elif defined(__i386__) || defined(__mc68000__) # define START_DATA(u) (u.u_tsize << PAGE_SHIFT) #endif +#ifdef __sparc__ +# define START_STACK(u) ((regs->u_regs[UREG_FP]) & ~(PAGE_SIZE - 1)) +#else +# define START_STACK(u) (u.start_stack) +#endif if (!current->dumpable || current->mm->count != 1) return 0; @@ -131,45 +139,76 @@ do_aout_core_dump(long signr, struct pt_regs * regs) has_dumped = 1; current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(current->comm)); +#ifndef __sparc__ dump.u_ar0 = (void *)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump))); +#endif dump.signal = signr; dump_thread(regs, &dump); /* If the size of the dump file exceeds the rlimit, then see what would happen if we wrote the stack, but not the data area. */ +#ifdef __sparc__ + if ((dump.u_dsize+dump.u_ssize) > + current->rlim[RLIMIT_CORE].rlim_cur) + dump.u_dsize = 0; +#else if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE > current->rlim[RLIMIT_CORE].rlim_cur) dump.u_dsize = 0; +#endif /* Make sure we have enough room to write the stack and data areas. */ +#ifdef __sparc__ + if ((dump.u_ssize) > + current->rlim[RLIMIT_CORE].rlim_cur) + dump.u_ssize = 0; +#else if ((dump.u_ssize+1) * PAGE_SIZE > current->rlim[RLIMIT_CORE].rlim_cur) dump.u_ssize = 0; +#endif /* make sure we actually have a data and stack area to dump */ set_fs(USER_DS); +#ifdef __sparc__ + if (verify_area(VERIFY_READ, (void *) START_DATA(dump), dump.u_dsize)) + dump.u_dsize = 0; + if (verify_area(VERIFY_READ, (void *) START_STACK(dump), dump.u_ssize)) + dump.u_ssize = 0; +#else if (verify_area(VERIFY_READ, (void *) START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) dump.u_dsize = 0; - if (verify_area(VERIFY_READ, (void *) dump.start_stack, dump.u_ssize << PAGE_SHIFT)) + if (verify_area(VERIFY_READ, (void *) START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) dump.u_ssize = 0; +#endif set_fs(KERNEL_DS); /* struct user */ DUMP_WRITE(&dump,sizeof(dump)); /* Now dump all of the user data. Include malloced stuff as well */ +#ifndef __sparc__ DUMP_SEEK(PAGE_SIZE); +#endif /* now we start writing out the user space info */ set_fs(USER_DS); /* Dump the data area */ if (dump.u_dsize != 0) { dump_start = START_DATA(dump); +#ifdef __sparc__ + dump_size = dump.u_dsize; +#else dump_size = dump.u_dsize << PAGE_SHIFT; +#endif DUMP_WRITE(dump_start,dump_size); } /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { - dump_start = dump.start_stack; + dump_start = START_STACK(dump); +#ifdef __sparc__ + dump_size = dump.u_ssize; +#else dump_size = dump.u_ssize << PAGE_SHIFT; +#endif DUMP_WRITE(dump_start,dump_size); } /* Finally dump the task struct. Not be used by gdb, but could be useful */ @@ -210,6 +249,11 @@ static unsigned long * create_aout_tables(char * p, struct linux_binprm * bprm) int envc = bprm->envc; sp = (unsigned long *) ((-(unsigned long)sizeof(char *)) & (unsigned long) p); +#ifdef __sparc__ + /* This imposes the proper stack alignment for a new process. */ + sp = (unsigned long *) (((unsigned long) sp) & ~7); + if ((envc+argc+3)&1) --sp; +#endif #ifdef __alpha__ /* whee.. test-programs are so much fun. */ put_user(0, --sp); @@ -259,8 +303,7 @@ static unsigned long * create_aout_tables(char * p, struct linux_binprm * bprm) * libraries. There is no binary dependent code anywhere else. */ -static inline int -do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) +static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) { struct exec ex; struct file * file; @@ -271,8 +314,8 @@ do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) unsigned long rlim; ex = *((struct exec *) bprm->buf); /* exec-header */ - if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && - N_MAGIC(ex) != QMAGIC) || + if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && + N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || bprm->inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { return -ENOEXEC; @@ -306,6 +349,9 @@ do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* OK, This is the point of no return */ flush_old_exec(bprm); +#ifdef __sparc__ + memcpy(¤t->tss.core_exec, &ex, sizeof(struct exec)); +#endif current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); @@ -319,8 +365,25 @@ do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->suid = current->euid = current->fsuid = bprm->e_uid; current->sgid = current->egid = current->fsgid = bprm->e_gid; current->flags &= ~PF_FORKNOEXEC; +#ifdef __sparc__ + if (N_MAGIC(ex) == NMAGIC) { + /* Fuck me plenty... */ + error = do_mmap(NULL, N_TXTADDR(ex), ex.a_text, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + read_exec(bprm->inode, fd_offset, (char *) N_TXTADDR(ex), + ex.a_text, 0); + error = do_mmap(NULL, N_DATADDR(ex), ex.a_data, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + read_exec(bprm->inode, fd_offset + ex.a_text, (char *) N_DATADDR(ex), + ex.a_data, 0); + goto beyond_if; + } +#endif + if (N_MAGIC(ex) == OMAGIC) { -#ifdef __alpha__ +#if defined(__alpha__) || defined(__sparc__) do_mmap(NULL, N_TXTADDR(ex) & PAGE_MASK, ex.a_text+ex.a_data + PAGE_SIZE - 1, PROT_READ|PROT_WRITE|PROT_EXEC, @@ -334,11 +397,12 @@ do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) read_exec(bprm->inode, 32, (char *) 0, ex.a_text+ex.a_data, 0); #endif } else { - if (ex.a_text & 0xfff || ex.a_data & 0xfff) + if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && + (N_MAGIC(ex) != NMAGIC)) printk(KERN_NOTICE "executable not page aligned\n"); - + fd = open_inode(bprm->inode, O_RDONLY); - + if (fd < 0) return fd; file = current->files->fd[fd]; @@ -362,7 +426,7 @@ do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) send_sig(SIGKILL, current, 0); return error; } - + error = do_mmap(file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, @@ -374,21 +438,21 @@ do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) } } beyond_if: - if (current->exec_domain && current->exec_domain->use_count) - (*current->exec_domain->use_count)--; - if (current->binfmt && current->binfmt->use_count) - (*current->binfmt->use_count)--; + if (current->exec_domain && current->exec_domain->module) + __MOD_DEC_USE_COUNT(current->exec_domain->module); + if (current->binfmt && current->binfmt->module) + __MOD_DEC_USE_COUNT(current->binfmt->module); current->exec_domain = lookup_exec_domain(current->personality); current->binfmt = &aout_format; - if (current->exec_domain && current->exec_domain->use_count) - (*current->exec_domain->use_count)++; - if (current->binfmt && current->binfmt->use_count) - (*current->binfmt->use_count)++; + if (current->exec_domain && current->exec_domain->module) + __MOD_INC_USE_COUNT(current->exec_domain->module); + if (current->binfmt && current->binfmt->module) + __MOD_INC_USE_COUNT(current->binfmt->module); set_brk(current->mm->start_brk, current->mm->brk); p = setup_arg_pages(p, bprm); - + p = (unsigned long) create_aout_tables((char *)p, bprm); current->mm->start_stack = p; #ifdef __alpha__ @@ -400,6 +464,7 @@ beyond_if: return 0; } + static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) { @@ -421,10 +486,10 @@ do_load_aout_library(int fd) unsigned int bss; unsigned int start_addr; unsigned long error; - + file = current->files->fd[fd]; inode = file->f_inode; - + if (!file || !file->f_op) return -EACCES; @@ -447,12 +512,12 @@ do_load_aout_library(int fd) inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { return -ENOEXEC; } - if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) && + if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) && (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) { printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n"); return -ENOEXEC; } - + if (N_FLAGS(ex)) return -ENOEXEC; /* For QMAGIC, the starting address is 0x20 into the page. We mask @@ -491,7 +556,8 @@ load_aout_library(int fd) } -int init_aout_binfmt(void) { +__initfunc(int init_aout_binfmt(void)) +{ return register_binfmt(&aout_format); } @@ -504,4 +570,3 @@ void cleanup_module( void) { unregister_binfmt(&aout_format); } #endif - diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9af62dafc..587c44f8f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -27,6 +27,7 @@ #include <linux/shm.h> #include <linux/personality.h> #include <linux/elfcore.h> +#include <linux/init.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -44,6 +45,10 @@ static int load_elf_library(int fd); extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); extern void dump_thread(struct pt_regs *, struct user *); +#ifdef __sparc__ +extern unsigned long get_unmapped_area(unsigned long addr, unsigned long len); +#endif + /* * If we don't support core dumping, then supply a NULL so we * don't even try. @@ -61,7 +66,7 @@ static struct linux_binfmt elf_format = { #ifndef MODULE NULL, NULL, load_elf_binary, load_elf_library, elf_core_dump #else - NULL, &mod_use_count_, load_elf_binary, load_elf_library, elf_core_dump + NULL, &__this_module, load_elf_binary, load_elf_library, elf_core_dump #endif }; @@ -90,7 +95,7 @@ static void set_brk(unsigned long start, unsigned long end) { start = PAGE_ALIGN(start); end = PAGE_ALIGN(end); - if (end <= start) + if (end <= start) return; do_mmap(NULL, start, end - start, PROT_READ | PROT_WRITE | PROT_EXEC, @@ -107,7 +112,7 @@ static void set_brk(unsigned long start, unsigned long end) static void padzero(unsigned long elf_bss) { unsigned long nbyte; - + nbyte = elf_bss & (PAGE_SIZE-1); if (nbyte) { nbyte = PAGE_SIZE - nbyte; @@ -115,29 +120,28 @@ static void padzero(unsigned long elf_bss) } } -unsigned long * create_elf_tables(char *p, int argc, int envc, +unsigned long * create_elf_tables(char *p, int argc, int envc, struct elfhdr * exec, unsigned long load_addr, unsigned long interp_load_addr, int ibcs) { char **argv, **envp; unsigned long *sp; -#ifdef __mips__ - unsigned long * csp; -#endif /* * Force 16 byte alignment here for generality. */ sp = (unsigned long *) (~15UL & (unsigned long) p); -#ifdef __mips__ - /* Make sure we will be aligned properly at the end of this. */ +#if defined(__mips__) || defined(__sparc__) +{ + unsigned long *csp; csp = sp; csp -= exec ? DLINFO_ITEMS*2 : 2; - csp -= envc + 1; + csp -= envc+1; csp -= argc+1; if (!(((unsigned long) csp) & 4)) - sp--; + sp--; +} #endif /* @@ -200,7 +204,7 @@ unsigned long * create_elf_tables(char *p, int argc, int envc, an ELF header */ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, - struct inode * interpreter_inode, + struct inode * interpreter_inode, unsigned long *interp_load_addr) { struct file * file; @@ -213,18 +217,14 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, unsigned long last_bss, elf_bss; unsigned long error; int i; - + elf_bss = 0; last_bss = 0; error = load_addr = 0; - -#ifdef DEBUG_ELF - printk("[load_elf_interp] "); -#endif /* First of all, some simple consistency checks */ - if ((interp_elf_ex->e_type != ET_EXEC && - interp_elf_ex->e_type != ET_DYN) || + if ((interp_elf_ex->e_type != ET_EXEC && + interp_elf_ex->e_type != ET_DYN) || !elf_check_arch(interp_elf_ex->e_machine) || (!interpreter_inode->i_op || !interpreter_inode->i_op->default_file_ops->mmap)){ @@ -233,18 +233,18 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, #endif return ~0UL; } - + /* Now read in all of the header information */ - + if (sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > PAGE_SIZE) return ~0UL; - + elf_phdata = (struct elf_phdr *) kmalloc(sizeof(struct elf_phdr) * interp_elf_ex->e_phnum, GFP_KERNEL); if (!elf_phdata) return ~0UL; - + /* * If the size of this structure has changed, then punt, since * we will be doing the wrong thing. @@ -255,10 +255,10 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, return ~0UL; } - retval = read_exec(interpreter_inode, interp_elf_ex->e_phoff, + retval = read_exec(interpreter_inode, interp_elf_ex->e_phoff, (char *) elf_phdata, sizeof(struct elf_phdr) * interp_elf_ex->e_phnum, 1); - + if (retval < 0) { kfree (elf_phdata); return retval; @@ -292,10 +292,15 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, { elf_type |= MAP_FIXED; vaddr = eppnt->p_vaddr; +#ifdef __sparc__ + } else { + load_addr = get_unmapped_area(0, eppnt->p_filesz + + ELF_PAGEOFFSET(eppnt->p_vaddr)); +#endif } error = do_mmap(file, - load_addr + ELF_PAGESTART(vaddr), + ELF_PAGESTART(vaddr) + load_addr, eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), elf_prot, elf_type, @@ -325,7 +330,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; if (k > last_bss) last_bss = k; } - + /* Now use mmap to map the library into memory. */ sys_close(elf_exec_fileno); @@ -355,18 +360,18 @@ static unsigned long load_aout_interp(struct exec * interp_ex, { int retval; unsigned long elf_entry; - + current->mm->brk = interp_ex->a_bss + (current->mm->end_data = interp_ex->a_data + (current->mm->end_code = interp_ex->a_text)); elf_entry = interp_ex->a_entry; - - + + if (N_MAGIC(*interp_ex) == OMAGIC) { do_mmap(NULL, 0, interp_ex->a_text+interp_ex->a_data, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, 0); - retval = read_exec(interpreter_inode, 32, (char *) 0, + retval = read_exec(interpreter_inode, 32, (char *) 0, interp_ex->a_text+interp_ex->a_data, 0); } else if (N_MAGIC(*interp_ex) == ZMAGIC || N_MAGIC(*interp_ex) == QMAGIC) { do_mmap(NULL, 0, interp_ex->a_text+interp_ex->a_data, @@ -378,7 +383,7 @@ static unsigned long load_aout_interp(struct exec * interp_ex, interp_ex->a_text+interp_ex->a_data, 0); } else retval = -1; - + if (retval >= 0) do_mmap(NULL, ELF_PAGESTART(interp_ex->a_text + interp_ex->a_data + ELF_EXEC_PAGESIZE - 1), interp_ex->a_bss, @@ -423,27 +428,27 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) unsigned long start_code, end_code, end_data; unsigned long elf_stack; char passed_fileno[6]; - + ibcs2_interpreter = 0; status = 0; load_addr = 0; elf_ex = *((struct elfhdr *) bprm->buf); /* exec-header */ - + if (elf_ex.e_ident[0] != 0x7f || strncmp(&elf_ex.e_ident[1], "ELF",3) != 0) { return -ENOEXEC; } - - + + /* First of all, some simple consistency checks */ if ((elf_ex.e_type != ET_EXEC && - elf_ex.e_type != ET_DYN) || + elf_ex.e_type != ET_DYN) || (! elf_check_arch(elf_ex.e_machine)) || (!bprm->inode->i_op || !bprm->inode->i_op->default_file_ops || !bprm->inode->i_op->default_file_ops->mmap)){ return -ENOEXEC; } - + #ifdef __mips__ /* IRIX binaries handled elsewhere. */ if(elf_ex.e_flags & EF_MIPS_ARCH) @@ -451,39 +456,40 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) #endif /* Now read in all of the header information */ + elf_phdata = (struct elf_phdr *) kmalloc(elf_ex.e_phentsize * elf_ex.e_phnum, GFP_KERNEL); if (elf_phdata == NULL) { return -ENOMEM; } - + retval = read_exec(bprm->inode, elf_ex.e_phoff, (char *) elf_phdata, elf_ex.e_phentsize * elf_ex.e_phnum, 1); if (retval < 0) { kfree (elf_phdata); return retval; } - + elf_ppnt = elf_phdata; - + elf_bss = 0; elf_brk = 0; - + elf_exec_fileno = open_inode(bprm->inode, O_RDONLY); if (elf_exec_fileno < 0) { kfree (elf_phdata); return elf_exec_fileno; } - + file = current->files->fd[elf_exec_fileno]; - + elf_stack = ~0UL; elf_interpreter = NULL; start_code = ~0UL; end_code = 0; end_data = 0; - + for(i=0;i < elf_ex.e_phnum; i++){ if (elf_ppnt->p_type == PT_INTERP) { if ( elf_interpreter != NULL ) @@ -496,17 +502,17 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* This is the program interpreter used for * shared libraries - for now assume that this - * is an a.out format binary + * is an a.out format binary */ - - elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz, + + elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz, GFP_KERNEL); if (elf_interpreter == NULL) { kfree (elf_phdata); sys_close(elf_exec_fileno); return -ENOMEM; } - + retval = read_exec(bprm->inode,elf_ppnt->p_offset, elf_interpreter, elf_ppnt->p_filesz, 1); @@ -529,11 +535,11 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) if (retval >= 0) retval = read_exec(interpreter_inode,0,bprm->buf,128, 1); - + if (retval >= 0) { interp_ex = *((struct exec *) bprm->buf); /* exec-header */ interp_elf_ex = *((struct elfhdr *) bprm->buf); /* exec-header */ - + } if (retval < 0) { kfree (elf_phdata); @@ -550,9 +556,9 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT; /* Now figure out which format our binary is */ - if ((N_MAGIC(interp_ex) != OMAGIC) && + if ((N_MAGIC(interp_ex) != OMAGIC) && (N_MAGIC(interp_ex) != ZMAGIC) && - (N_MAGIC(interp_ex) != QMAGIC)) + (N_MAGIC(interp_ex) != QMAGIC)) interpreter_type = INTERPRETER_ELF; if (interp_elf_ex.e_ident[0] != 0x7f || @@ -567,18 +573,18 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) return -ELIBBAD; } } - + /* OK, we are done with that, now set up the arg stuff, and then start this sucker up */ - + if (!bprm->sh_bang) { char * passed_p; - + if (interpreter_type == INTERPRETER_AOUT) { sprintf(passed_fileno, "%d", elf_exec_fileno); passed_p = passed_fileno; - - if(elf_interpreter) { + + if (elf_interpreter) { bprm->p = copy_strings(1,&passed_p,bprm->page,bprm->p,2); bprm->argc++; } @@ -592,7 +598,7 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) return -E2BIG; } } - + /* OK, This is the point of no return */ flush_old_exec(bprm); @@ -601,18 +607,18 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->start_mmap = ELF_START_MMAP; current->mm->mmap = NULL; elf_entry = (unsigned long) elf_ex.e_entry; - + /* Do this so that we can load the interpreter, if need be. We will change some of these later */ current->mm->rss = 0; bprm->p = setup_arg_pages(bprm->p, bprm); current->mm->start_stack = bprm->p; - + /* Now we do a little grungy work by mmaping the ELF image into the correct location in memory. At this point, we assume that the image should be loaded at fixed address, not at a variable address. */ - + old_fs = get_fs(); set_fs(get_ds()); for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) { @@ -631,13 +637,13 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) MAP_DENYWRITE | MAP_EXECUTABLE), (elf_ppnt->p_offset - ELF_PAGEOFFSET(elf_ppnt->p_vaddr))); - + #ifdef LOW_ELF_STACK - if (ELF_PAGESTART(elf_ppnt->p_vaddr) < elf_stack) + if (ELF_PAGESTART(elf_ppnt->p_vaddr) < elf_stack) elf_stack = ELF_PAGESTART(elf_ppnt->p_vaddr); #endif - - if (!load_addr_set) { + + if (!load_addr_set) { load_addr = elf_ppnt->p_vaddr - elf_ppnt->p_offset; load_addr_set = 1; } @@ -650,27 +656,27 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) #else if ( !(elf_ppnt->p_flags & PF_W) && end_code < k) #endif - end_code = k; - if (end_data < k) end_data = k; + end_code = k; + if (end_data < k) end_data = k; k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; - if (k > elf_brk) elf_brk = k; + if (k > elf_brk) elf_brk = k; } } set_fs(old_fs); if (elf_interpreter) { - if (interpreter_type & 1) + if (interpreter_type & 1) elf_entry = load_aout_interp(&interp_ex, interpreter_inode); - else if (interpreter_type & 2) - elf_entry = load_elf_interp(&interp_elf_ex, - interpreter_inode, + else if (interpreter_type & 2) + elf_entry = load_elf_interp(&interp_elf_ex, + interpreter_inode, &interp_load_addr); iput(interpreter_inode); kfree(elf_interpreter); - - if (elf_entry == ~0UL) { + + if (elf_entry == ~0UL) { printk("Unable to load interpreter\n"); kfree(elf_phdata); send_sig(SIGSEGV, current, 0); @@ -679,20 +685,20 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) } kfree(elf_phdata); - + if (interpreter_type != INTERPRETER_AOUT) sys_close(elf_exec_fileno); current->personality = (ibcs2_interpreter ? PER_SVR4 : PER_LINUX); - if (current->exec_domain && current->exec_domain->use_count) - (*current->exec_domain->use_count)--; - if (current->binfmt && current->binfmt->use_count) - (*current->binfmt->use_count)--; + if (current->exec_domain && current->exec_domain->module) + __MOD_DEC_USE_COUNT(current->exec_domain->module); + if (current->binfmt && current->binfmt->module) + __MOD_DEC_USE_COUNT(current->binfmt->module); current->exec_domain = lookup_exec_domain(current->personality); current->binfmt = &elf_format; - if (current->exec_domain && current->exec_domain->use_count) - (*current->exec_domain->use_count)++; - if (current->binfmt && current->binfmt->use_count) - (*current->binfmt->use_count)++; + if (current->exec_domain && current->exec_domain->module) + __MOD_INC_USE_COUNT(current->exec_domain->module); + if (current->binfmt && current->binfmt->module) + __MOD_INC_USE_COUNT(current->binfmt->module); #ifndef VM_STACK_FLAGS current->executable = bprm->inode; @@ -704,7 +710,7 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->suid = current->euid = current->fsuid = bprm->e_uid; current->sgid = current->egid = current->fsgid = bprm->e_gid; current->flags &= ~PF_FORKNOEXEC; - bprm->p = (unsigned long) + bprm->p = (unsigned long) create_elf_tables((char *)bprm->p, bprm->argc, bprm->envc, @@ -793,7 +799,7 @@ do_load_elf_library(int fd){ file = current->files->fd[fd]; inode = file->f_inode; elf_bss = 0; - + if (!file || !file->f_op) return -EACCES; @@ -819,31 +825,31 @@ do_load_elf_library(int fd){ !elf_check_arch(elf_ex.e_machine) || (!inode->i_op || !inode->i_op->default_file_ops->mmap)) return -ENOEXEC; - + /* Now read in all of the header information */ - + if (sizeof(struct elf_phdr) * elf_ex.e_phnum > PAGE_SIZE) return -ENOEXEC; - - elf_phdata = (struct elf_phdr *) + + elf_phdata = (struct elf_phdr *) kmalloc(sizeof(struct elf_phdr) * elf_ex.e_phnum, GFP_KERNEL); if (elf_phdata == NULL) return -ENOMEM; - + retval = read_exec(inode, elf_ex.e_phoff, (char *) elf_phdata, sizeof(struct elf_phdr) * elf_ex.e_phnum, 1); - + j = 0; for(i=0; i<elf_ex.e_phnum; i++) if ((elf_phdata + i)->p_type == PT_LOAD) j++; - + if (j != 1) { kfree(elf_phdata); return -ENOEXEC; } - + while(elf_phdata->p_type != PT_LOAD) elf_phdata++; - + /* Now use mmap to map the library into memory. */ error = do_mmap(file, ELF_PAGESTART(elf_phdata->p_vaddr), @@ -856,7 +862,7 @@ do_load_elf_library(int fd){ k = elf_phdata->p_vaddr + elf_phdata->p_filesz; if (k > elf_bss) elf_bss = k; - + if (error != ELF_PAGESTART(elf_phdata->p_vaddr)) { kfree(elf_phdata); return error; @@ -949,11 +955,11 @@ struct memelfnote static int notesize(struct memelfnote *en) { int sz; - + sz = sizeof(struct elf_note); sz += roundup(strlen(en->name), 4); sz += roundup(en->datasz, 4); - + return sz; } @@ -996,7 +1002,7 @@ static int writenote(struct memelfnote *men, struct file *file) DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ DUMP_WRITE(men->data, men->datasz); DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ - + return 1; } #undef DUMP_WRITE @@ -1034,7 +1040,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs) struct elf_prstatus prstatus; /* NT_PRSTATUS */ elf_fpregset_t fpu; /* NT_PRFPREG */ struct elf_prpsinfo psinfo; /* NT_PRPSINFO */ - + if (!current->dumpable || limit < PAGE_SIZE || current->mm->count != 1) return 0; current->dumpable = 0; @@ -1050,13 +1056,13 @@ static int elf_core_dump(long signr, struct pt_regs * regs) if (maydump(vma)) { int sz = vma->vm_end-vma->vm_start; - + if (size+sz >= limit) break; else size += sz; } - + segs++; } #ifdef DEBUG @@ -1069,7 +1075,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs) elf.e_ident[EI_DATA] = ELF_DATA; elf.e_ident[EI_VERSION] = EV_CURRENT; memset(elf.e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); - + elf.e_type = ET_CORE; elf.e_machine = ELF_ARCH; elf.e_version = EV_CURRENT; @@ -1083,7 +1089,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs) elf.e_shentsize = 0; elf.e_shnum = 0; elf.e_shstrndx = 0; - + fs = get_fs(); set_fs(KERNEL_DS); memcpy(corefile,"core.",5); @@ -1131,8 +1137,8 @@ static int elf_core_dump(long signr, struct pt_regs * regs) notes[0].datasz = sizeof(prstatus); notes[0].data = &prstatus; prstatus.pr_info.si_signo = prstatus.pr_cursig = signr; - copy_sigbits32(&prstatus.pr_sigpend, current->signal); - copy_sigbits32(&prstatus.pr_sighold, current->blocked); + prstatus.pr_sigpend = current->signal; + prstatus.pr_sighold = current->blocked; psinfo.pr_pid = prstatus.pr_pid = current->pid; psinfo.pr_ppid = prstatus.pr_ppid = current->p_pptr->pid; psinfo.pr_pgrp = prstatus.pr_pgrp = current->pgrp; @@ -1155,14 +1161,14 @@ static int elf_core_dump(long signr, struct pt_regs * regs) #else if (sizeof(elf_gregset_t) != sizeof(struct pt_regs)) { - printk("sizeof(elf_gregset_t) (%d) != sizeof(struct pt_regs) (%d)\n", + printk("sizeof(elf_gregset_t) (%ld) != sizeof(struct pt_regs) (%ld)\n", sizeof(elf_gregset_t), sizeof(struct pt_regs)); } else *(struct pt_regs *)&prstatus.pr_reg = *regs; #endif - -#if defined (DEBUG) && defined (__i386__) + +#ifdef DEBUG dump_regs("Passed in regs", (elf_greg_t *)regs); dump_regs("prstatus regs", (elf_greg_t *)&prstatus.pr_reg); #endif @@ -1182,7 +1188,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs) int i, len; set_fs(fs); - + len = current->mm->arg_end - current->mm->arg_start; len = len >= ELF_PRARGSZ ? ELF_PRARGSZ : len; copy_from_user(&psinfo.pr_psargs, @@ -1200,7 +1206,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs) notes[2].type = NT_TASKSTRUCT; notes[2].datasz = sizeof(*current); notes[2].data = current; - + /* Try to dump the fpu. */ prstatus.pr_fpvalid = dump_fpu (regs, &fpu); if (!prstatus.pr_fpvalid) @@ -1222,7 +1228,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs) for(i = 0; i < numnote; i++) sz += notesize(¬es[i]); - + phdr.p_type = PT_NOTE; phdr.p_offset = offset; phdr.p_vaddr = 0; @@ -1238,7 +1244,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs) /* Page-align dumped data */ dataoff = offset = roundup(offset, PAGE_SIZE); - + /* Write program headers for segments dump */ for(vma = current->mm->mmap, i = 0; i < segs && vma != NULL; vma = vma->vm_next) { @@ -1248,7 +1254,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs) i++; sz = vma->vm_end - vma->vm_start; - + phdr.p_type = PT_LOAD; phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; @@ -1267,20 +1273,20 @@ static int elf_core_dump(long signr, struct pt_regs * regs) for(i = 0; i < numnote; i++) if (!writenote(¬es[i], &file)) goto close_coredump; - + set_fs(fs); DUMP_SEEK(dataoff); - + for(i = 0, vma = current->mm->mmap; i < segs && vma != NULL; vma = vma->vm_next) { unsigned long addr = vma->vm_start; unsigned long len = vma->vm_end - vma->vm_start; - + + i++; if (!maydump(vma)) continue; - i++; #ifdef DEBUG printk("elf_core_dump: writing %08lx %lx\n", addr, len); #endif @@ -1307,14 +1313,14 @@ static int elf_core_dump(long signr, struct pt_regs * regs) } #endif /* USE_ELF_CORE_DUMP */ -int init_elf_binfmt(void) +__initfunc(int init_elf_binfmt(void)) { return register_binfmt(&elf_format); } #ifdef MODULE -int init_module(void) +int init_module(void) { /* Install the COFF, ELF and XOUT loaders. * N.B. We *rely* on the table being the right size with the @@ -1324,7 +1330,7 @@ int init_module(void) } -void cleanup_module( void) +void cleanup_module( void) { /* Remove the COFF and ELF loaders. */ unregister_binfmt(&elf_format); diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c new file mode 100644 index 000000000..7bd71f212 --- /dev/null +++ b/fs/binfmt_em86.c @@ -0,0 +1,123 @@ +/* + * linux/fs/binfmt_em86.c + * + * Based on linux/fs/binfmt_script.c + * Copyright (C) 1996 Martin von Löwis + * original #!-checking implemented by tytso. + * + * em86 changes Copyright (C) 1997 Jim Paradis + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/malloc.h> +#include <linux/binfmts.h> +#include <linux/elf.h> +#include <linux/init.h> + + +#define EM86_INTERP "/usr/bin/em86" +#define EM86_I_NAME "em86" + +static int do_load_em86(struct linux_binprm *bprm,struct pt_regs *regs) +{ + char *interp, *i_name, *i_arg; + int retval; + struct elfhdr elf_ex; + + /* Make sure this is a Linux/Intel ELF executable... */ + elf_ex = *((struct elfhdr *)bprm->buf); + + if (elf_ex.e_ident[0] != 0x7f || + strncmp(&elf_ex.e_ident[1], "ELF",3) != 0) { + return -ENOEXEC; + } + + + /* First of all, some simple consistency checks */ + if ((elf_ex.e_type != ET_EXEC && + elf_ex.e_type != ET_DYN) || + (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) || + (!bprm->inode->i_op || !bprm->inode->i_op->default_file_ops || + !bprm->inode->i_op->default_file_ops->mmap)){ + return -ENOEXEC; + } + + bprm->sh_bang++; /* Well, the bang-shell is implicit... */ + iput(bprm->inode); + bprm->dont_iput = 1; + + /* Unlike in the script case, we don't have to do any hairy + * parsing to find our interpreter... it's hardcoded! + */ + interp = EM86_INTERP; + i_name = EM86_I_NAME; + i_arg = NULL; /* We reserve the right to add an arg later */ + + /* + * Splice in (1) the interpreter's name for argv[0] + * (2) (optional) argument to interpreter + * (3) filename of emulated file (replace argv[0]) + * + * This is done in reverse order, because of how the + * user environment and arguments are stored. + */ + remove_arg_zero(bprm); + bprm->p = copy_strings(1, &bprm->filename, bprm->page, bprm->p, 2); + bprm->argc++; + if (i_arg) { + bprm->p = copy_strings(1, &i_arg, bprm->page, bprm->p, 2); + bprm->argc++; + } + bprm->p = copy_strings(1, &i_name, bprm->page, bprm->p, 2); + bprm->argc++; + if (!bprm->p) + return -E2BIG; + /* + * OK, now restart the process with the interpreter's inode. + * Note that we use open_namei() as the name is now in kernel + * space, and we don't need to copy it. + */ + retval = open_namei(interp, 0, 0, &bprm->inode, NULL); + if (retval) + return retval; + bprm->dont_iput=0; + retval=prepare_binprm(bprm); + if(retval<0) + return retval; + return search_binary_handler(bprm,regs); +} + +static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) +{ + int retval; + MOD_INC_USE_COUNT; + retval = do_load_em86(bprm,regs); + MOD_DEC_USE_COUNT; + return retval; +} + +struct linux_binfmt em86_format = { +#ifndef MODULE + NULL, 0, load_em86, NULL, NULL +#else + NULL, &__this_module, load_em86, NULL, NULL +#endif +}; + +__initfunc(int init_em86_binfmt(void)) +{ + return register_binfmt(&em86_format); +} + +#ifdef MODULE +int init_module(void) +{ + return init_em86_binfmt(); +} + +void cleanup_module( void) { + unregister_binfmt(&em86_format); +} +#endif diff --git a/fs/binfmt_java.c b/fs/binfmt_java.c index 6b75fa830..fcf664c5d 100644 --- a/fs/binfmt_java.c +++ b/fs/binfmt_java.c @@ -13,6 +13,7 @@ #include <linux/stat.h> #include <linux/malloc.h> #include <linux/binfmts.h> +#include <linux/init.h> #define _PATH_JAVA "/usr/bin/java" #define _PATH_APPLET "/usr/bin/appletviewer" @@ -144,7 +145,7 @@ static struct linux_binfmt java_format = { #ifndef MODULE NULL, 0, load_java, NULL, NULL #else - NULL, &mod_use_count_, load_java, NULL, NULL + NULL, &__this_module, load_java, NULL, NULL #endif }; @@ -161,11 +162,12 @@ static struct linux_binfmt applet_format = { #ifndef MODULE NULL, 0, load_applet, NULL, NULL #else - NULL, &mod_use_count_, load_applet, NULL, NULL + NULL, &__this_module, load_applet, NULL, NULL #endif }; -int init_java_binfmt(void) { +__initfunc(int init_java_binfmt(void)) +{ register_binfmt(&java_format); return register_binfmt(&applet_format); } diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 9050a106c..1bd2f0d10 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -10,6 +10,7 @@ #include <linux/stat.h> #include <linux/malloc.h> #include <linux/binfmts.h> +#include <linux/init.h> static int do_load_script(struct linux_binprm *bprm,struct pt_regs *regs) { @@ -76,7 +77,6 @@ static int do_load_script(struct linux_binprm *bprm,struct pt_regs *regs) /* * OK, now restart the process with the interpreter's inode. */ - bprm->filename = interp; retval = open_namei(interp, 0, 0, &bprm->inode, NULL); if (retval) return retval; @@ -100,11 +100,12 @@ struct linux_binfmt script_format = { #ifndef MODULE NULL, 0, load_script, NULL, NULL #else - NULL, &mod_use_count_, load_script, NULL, NULL + NULL, &__this_module, load_script, NULL, NULL #endif }; -int init_script_binfmt(void) { +__initfunc(int init_script_binfmt(void)) +{ return register_binfmt(&script_format); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 4d802f4e3..f42026ac2 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -23,19 +23,16 @@ extern int *blksize_size[]; long block_write(struct inode * inode, struct file * filp, const char * buf, unsigned long count) { - int blocksize, blocksize_bits, i, j, buffercount,write_error; + int blocksize, blocksize_bits, i, buffercount,write_error; int block, blocks; loff_t offset; int chars; int written = 0; - int cluster_list[MAX_BUF_PER_PAGE]; struct buffer_head * bhlist[NBUF]; - int blocks_per_cluster; unsigned int size; kdev_t dev; struct buffer_head * bh, *bufferlist[NBUF]; register char * p; - int excess; write_error = buffercount = 0; dev = inode->i_rdev; @@ -52,8 +49,6 @@ long block_write(struct inode * inode, struct file * filp, i >>= 1; } - blocks_per_cluster = PAGE_SIZE / blocksize; - block = filp->f_pos >> blocksize_bits; offset = filp->f_pos & (blocksize-1); @@ -69,15 +64,14 @@ long block_write(struct inode * inode, struct file * filp, chars=count; #if 0 - if (chars == blocksize) - bh = getblk(dev, block, blocksize); - else - bh = breada(dev,block,block+1,block+2,-1); - + /* get the buffer head */ + { + struct buffer_head * (*fn)(kdev_t, int, int) = getblk; + if (chars != blocksize) + fn = bread; + bh = fn(dev, block, blocksize); + } #else - for(i=0; i<blocks_per_cluster; i++) cluster_list[i] = block+i; - if((block % blocks_per_cluster) == 0) - generate_cluster(dev, cluster_list, blocksize); bh = getblk(dev, block, blocksize); if (chars != blocksize && !buffer_uptodate(bh)) { @@ -91,15 +85,8 @@ long block_write(struct inode * inode, struct file * filp, blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2; if (block + blocks > size) blocks = size - block; if (blocks > NBUF) blocks=NBUF; - excess = (block + blocks) % blocks_per_cluster; - if ( blocks > excess ) - blocks -= excess; bhlist[0] = bh; for(i=1; i<blocks; i++){ - if(((i+block) % blocks_per_cluster) == 0) { - for(j=0; j<blocks_per_cluster; j++) cluster_list[j] = block+i+j; - generate_cluster(dev, cluster_list, blocksize); - }; bhlist[i] = getblk (dev, block+i, blocksize); if(!bhlist[i]){ while(i >= 0) brelse(bhlist[i--]); @@ -167,8 +154,6 @@ long block_read(struct inode * inode, struct file * filp, int blocksize_bits, i; unsigned int blocks, rblocks, left; int bhrequest, uptodate; - int cluster_list[MAX_BUF_PER_PAGE]; - int blocks_per_cluster; struct buffer_head ** bhb, ** bhe; struct buffer_head * buflist[NBUF]; struct buffer_head * bhreq[NBUF]; @@ -176,7 +161,6 @@ long block_read(struct inode * inode, struct file * filp, loff_t size; kdev_t dev; int read; - int excess; dev = inode->i_rdev; blocksize = BLOCK_SIZE; @@ -195,8 +179,6 @@ long block_read(struct inode * inode, struct file * filp, else size = INT_MAX; - blocks_per_cluster = PAGE_SIZE / blocksize; - if (offset > size) left = 0; /* size - offset might not fit into left, so check explicitly. */ @@ -217,9 +199,6 @@ long block_read(struct inode * inode, struct file * filp, if (filp->f_reada) { if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9)) blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9); - excess = (block + blocks) % blocks_per_cluster; - if ( blocks > excess ) - blocks -= excess; if (rblocks > blocks) blocks = rblocks; @@ -242,12 +221,6 @@ long block_read(struct inode * inode, struct file * filp, uptodate = 1; while (blocks) { --blocks; -#if 1 - if((block % blocks_per_cluster) == 0) { - for(i=0; i<blocks_per_cluster; i++) cluster_list[i] = block+i; - generate_cluster(dev, cluster_list, blocksize); - } -#endif *bhb = getblk(dev, block++, blocksize); if (*bhb && !buffer_uptodate(*bhb)) { uptodate = 0; @@ -268,7 +241,6 @@ long block_read(struct inode * inode, struct file * filp, /* Now request them all */ if (bhrequest) { ll_rw_block(READ, bhrequest, bhreq); - refill_freelist(blocksize); } do { /* Finish off all I/O that has actually completed */ diff --git a/fs/buffer.c b/fs/buffer.c index 043e35b6c..27950290a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -10,13 +10,16 @@ * data, of course), but instead letting the caller do it. */ -/* - * NOTE! There is one discordant note here: checking floppies for - * disk change. This is where it fits best, I think, as it should - * invalidate changed floppy-disk-caches. - */ - /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */ +/* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 */ + +/* Removed a lot of unnecessary code and simplified things now that + * the buffer cache isn't our primary cache - Andrew Tridgell 12/96 + */ + +/* Speed up hash, lru, and free list operations. Use gfp() for allocating + * hash table, use SLAB cache for buffer heads. -DaveM + */ #include <linux/sched.h> #include <linux/kernel.h> @@ -25,12 +28,14 @@ #include <linux/locks.h> #include <linux/errno.h> #include <linux/malloc.h> +#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/swapctl.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/vmalloc.h> +#include <linux/blkdev.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -40,39 +45,37 @@ #define NR_SIZES 5 static char buffersize_index[17] = {-1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4}; -static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096, 8192}; #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9]) #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) +#define MAX_UNUSED_BUFFERS 30 /* don't ever have more than this number of + unused buffer heads */ +#define HASH_PAGES 4 /* number of pages to use for the hash table */ +#define HASH_PAGES_ORDER 2 +#define NR_HASH (HASH_PAGES*PAGE_SIZE/sizeof(struct buffer_head *)) +#define HASH_MASK (NR_HASH-1) static int grow_buffers(int pri, int size); -static int shrink_specific_buffers(unsigned int priority, int size); -static int maybe_shrink_lav_buffers(int); -static int nr_hash = 0; /* Size of hash table */ static struct buffer_head ** hash_table; static struct buffer_head * lru_list[NR_LIST] = {NULL, }; -/* next_to_age is an array of pointers into the lru lists, used to - cycle through the buffers aging their contents when deciding which - buffers to discard when more memory is needed */ -static struct buffer_head * next_to_age[NR_LIST] = {NULL, }; static struct buffer_head * free_list[NR_SIZES] = {NULL, }; +static kmem_cache_t *bh_cachep; + static struct buffer_head * unused_list = NULL; -struct buffer_head * reuse_list = NULL; +static struct buffer_head * reuse_list = NULL; static struct wait_queue * buffer_wait = NULL; -int nr_buffers = 0; -int nr_buffers_type[NR_LIST] = {0,}; -int nr_buffers_size[NR_SIZES] = {0,}; -int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},}; -int buffer_usage[NR_SIZES] = {0,}; /* Usage counts used to determine load average */ -int buffers_lav[NR_SIZES] = {0,}; /* Load average of buffer usage */ -int nr_free[NR_SIZES] = {0,}; +static int nr_buffers = 0; +static int nr_buffers_type[NR_LIST] = {0,}; +static int nr_buffer_heads = 0; +static int nr_unused_buffer_heads = 0; +static int refilled = 0; /* Set NZ when a buffer freelist is refilled + this is used by the loop device */ + +/* This is used by some architectures to estimate available memory. */ int buffermem = 0; -int nr_buffer_heads = 0; -int refilled = 0; /* Set NZ when a buffer freelist is refilled */ -extern int *blksize_size[]; /* Here is the parameter block for the bdflush process. If you add or * remove any of the parameters, make sure to update kernel/sysctl.c. @@ -81,8 +84,10 @@ extern int *blksize_size[]; static void wakeup_bdflush(int); #define N_PARAM 9 -#define LAV +/* The dummy values in this structure are left in there for compatibility + * with old programs that play with the /proc entries. + */ union bdflush_param{ struct { int nfract; /* Percentage of buffer cache dirty to @@ -93,26 +98,17 @@ union bdflush_param{ each time we call refill */ int nref_dirt; /* Dirty buffer threshold for activating bdflush when trying to refill buffers. */ - int clu_nfract; /* Percentage of buffer cache to scan to - search for free clusters */ + int dummy1; /* unused */ int age_buffer; /* Time for normal buffer to age before we flush it */ int age_super; /* Time for superblock to age before we flush it */ - int lav_const; /* Constant used for load average (time - constant */ - int lav_ratio; /* Used to determine how low a lav for a - particular size can go before we start to - trim back the buffers */ + int dummy2; /* unused */ + int dummy3; /* unused */ } b_un; unsigned int data[N_PARAM]; } bdf_prm = {{60, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}}; -/* The lav constant is set for 1 minute, as long as the update process runs - every 5 seconds. If you change the frequency of update, the time - constant will also change. */ - - /* These are the min and max parameter values that we will allow to be assigned */ int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 100, 100, 1, 1}; int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5}; @@ -145,29 +141,31 @@ repeat: } /* Call sync_buffers with wait!=0 to ensure that the call does not - return until all buffer writes have completed. Sync() may return - before the writes have finished; fsync() may not. */ - + * return until all buffer writes have completed. Sync() may return + * before the writes have finished; fsync() may not. + */ /* Godamity-damn. Some buffers (bitmaps for filesystems) - spontaneously dirty themselves without ever brelse being called. - We will ultimately want to put these in a separate list, but for - now we search all of the lists for dirty buffers */ - + * spontaneously dirty themselves without ever brelse being called. + * We will ultimately want to put these in a separate list, but for + * now we search all of the lists for dirty buffers. + */ static int sync_buffers(kdev_t dev, int wait) { int i, retry, pass = 0, err = 0; struct buffer_head * bh, *next; /* One pass for no-wait, three for wait: - 0) write out all dirty, unlocked buffers; - 1) write out all dirty buffers, waiting if locked; - 2) wait for completion by waiting for all buffers to unlock. */ + * 0) write out all dirty, unlocked buffers; + * 1) write out all dirty buffers, waiting if locked; + * 2) wait for completion by waiting for all buffers to unlock. + */ do { retry = 0; repeat: - /* We search all lists as a failsafe mechanism, not because we expect - there to be dirty buffers on any of the other lists. */ + /* We search all lists as a failsafe mechanism, not because we expect + * there to be dirty buffers on any of the other lists. + */ bh = lru_list[BUF_DIRTY]; if (!bh) goto repeat2; @@ -181,7 +179,8 @@ repeat: continue; if (buffer_locked(bh)) { /* Buffer is locked; skip it unless wait is - requested AND pass > 0. */ + * requested AND pass > 0. + */ if (!wait || !pass) { retry = 1; continue; @@ -189,18 +188,27 @@ repeat: wait_on_buffer (bh); goto repeat; } + /* If an unlocked buffer is not uptodate, there has - been an IO error. Skip it. */ + * been an IO error. Skip it. + */ if (wait && buffer_req(bh) && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_uptodate(bh)) { err = 1; continue; } + /* Don't write clean buffers. Don't write ANY buffers - on the third pass. */ + * on the third pass. + */ if (!buffer_dirty(bh) || pass >= 2) continue; - /* don't bother about locked buffers */ + + /* Don't bother about locked buffers. + * + * XXX We checked if it was locked above and there is no + * XXX way we could have slept in between. -DaveM + */ if (buffer_locked(bh)) continue; bh->b_count++; @@ -226,7 +234,8 @@ repeat: continue; if (buffer_locked(bh)) { /* Buffer is locked; skip it unless wait is - requested AND pass > 0. */ + * requested AND pass > 0. + */ if (!wait || !pass) { retry = 1; continue; @@ -236,10 +245,11 @@ repeat: } } - /* If we are waiting for the sync to succeed, and if any dirty - blocks were written, then repeat; on the second pass, only - wait for buffers being written (do not pass to write any - more buffers on the second pass). */ + /* If we are waiting for the sync to succeed, and if any dirty + * blocks were written, then repeat; on the second pass, only + * wait for buffers being written (do not pass to write any + * more buffers on the second pass). + */ } while (wait && retry && ++pass<=2); return err; } @@ -264,7 +274,9 @@ int fsync_dev(kdev_t dev) asmlinkage int sys_sync(void) { + lock_kernel(); fsync_dev(0); + unlock_kernel(); return 0; } @@ -277,29 +289,39 @@ asmlinkage int sys_fsync(unsigned int fd) { struct file * file; struct inode * inode; + int err = 0; + lock_kernel(); if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode)) - return -EBADF; - if (!file->f_op || !file->f_op->fsync) - return -EINVAL; - if (file->f_op->fsync(inode,file)) - return -EIO; - return 0; + err = -EBADF; + else if (!file->f_op || !file->f_op->fsync) + err = -EINVAL; + else if (file->f_op->fsync(inode,file)) + err = -EIO; + unlock_kernel(); + return err; } asmlinkage int sys_fdatasync(unsigned int fd) { struct file * file; struct inode * inode; + int err = -EBADF; + lock_kernel(); if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode)) - return -EBADF; + goto out; + err = -EINVAL; if (!file->f_op || !file->f_op->fsync) - return -EINVAL; + goto out; /* this needs further work, at the moment it is identical to fsync() */ if (file->f_op->fsync(inode,file)) - return -EIO; - return 0; + err = -EIO; + else + err = 0; +out: + unlock_kernel(); + return err; } void invalidate_buffers(kdev_t dev) @@ -327,18 +349,17 @@ void invalidate_buffers(kdev_t dev) } } -#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash) +#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))&HASH_MASK) #define hash(dev,block) hash_table[_hashfn(dev,block)] static inline void remove_from_hash_queue(struct buffer_head * bh) { - if (bh->b_next) - bh->b_next->b_prev = bh->b_prev; - if (bh->b_prev) - bh->b_prev->b_next = bh->b_next; - if (hash(bh->b_dev,bh->b_blocknr) == bh) - hash(bh->b_dev,bh->b_blocknr) = bh->b_next; - bh->b_next = bh->b_prev = NULL; + if (bh->b_pprev) { + if(bh->b_next) + bh->b_next->b_pprev = bh->b_pprev; + *bh->b_pprev = bh->b_next; + bh->b_pprev = NULL; + } } static inline void remove_from_lru_list(struct buffer_head * bh) @@ -354,11 +375,6 @@ static inline void remove_from_lru_list(struct buffer_head * bh) lru_list[bh->b_list] = bh->b_next_free; if (lru_list[bh->b_list] == bh) lru_list[bh->b_list] = NULL; - if (next_to_age[bh->b_list] == bh) - next_to_age[bh->b_list] = bh->b_next_free; - if (next_to_age[bh->b_list] == bh) - next_to_age[bh->b_list] = NULL; - bh->b_next_free = bh->b_prev_free = NULL; } @@ -371,7 +387,6 @@ static inline void remove_from_free_list(struct buffer_head * bh) panic("Free list corrupted"); if(!free_list[isize]) panic("Free list empty"); - nr_free[isize]--; if(bh->b_next_free == bh) free_list[isize] = NULL; else { @@ -391,58 +406,55 @@ static inline void remove_from_queues(struct buffer_head * bh) return; } nr_buffers_type[bh->b_list]--; - nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--; remove_from_hash_queue(bh); remove_from_lru_list(bh); } static inline void put_last_lru(struct buffer_head * bh) { - if (!bh) - return; - if (bh == lru_list[bh->b_list]) { - lru_list[bh->b_list] = bh->b_next_free; - if (next_to_age[bh->b_list] == bh) - next_to_age[bh->b_list] = bh->b_next_free; - return; - } - if(bh->b_dev == B_FREE) - panic("Wrong block for lru list"); - remove_from_lru_list(bh); -/* add to back of free list */ + if (bh) { + struct buffer_head **bhp = &lru_list[bh->b_list]; - if(!lru_list[bh->b_list]) { - lru_list[bh->b_list] = bh; - lru_list[bh->b_list]->b_prev_free = bh; - } - if (!next_to_age[bh->b_list]) - next_to_age[bh->b_list] = bh; + if (bh == *bhp) { + *bhp = bh->b_next_free; + return; + } + + if(bh->b_dev == B_FREE) + panic("Wrong block for lru list"); + + /* Add to back of free list. */ + remove_from_lru_list(bh); + if(!*bhp) { + *bhp = bh; + (*bhp)->b_prev_free = bh; + } - bh->b_next_free = lru_list[bh->b_list]; - bh->b_prev_free = lru_list[bh->b_list]->b_prev_free; - lru_list[bh->b_list]->b_prev_free->b_next_free = bh; - lru_list[bh->b_list]->b_prev_free = bh; + bh->b_next_free = *bhp; + bh->b_prev_free = (*bhp)->b_prev_free; + (*bhp)->b_prev_free->b_next_free = bh; + (*bhp)->b_prev_free = bh; + } } static inline void put_last_free(struct buffer_head * bh) { - int isize; - if (!bh) - return; + if (bh) { + struct buffer_head **bhp = &free_list[BUFSIZE_INDEX(bh->b_size)]; - isize = BUFSIZE_INDEX(bh->b_size); - bh->b_dev = B_FREE; /* So it is obvious we are on the free list */ - /* add to back of free list */ - if(!free_list[isize]) { - free_list[isize] = bh; - bh->b_prev_free = bh; - } + bh->b_dev = B_FREE; /* So it is obvious we are on the free list. */ - nr_free[isize]++; - bh->b_next_free = free_list[isize]; - bh->b_prev_free = free_list[isize]->b_prev_free; - free_list[isize]->b_prev_free->b_next_free = bh; - free_list[isize]->b_prev_free = bh; + /* Add to back of free list. */ + if(!*bhp) { + *bhp = bh; + bh->b_prev_free = bh; + } + + bh->b_next_free = *bhp; + bh->b_prev_free = (*bhp)->b_prev_free; + (*bhp)->b_prev_free->b_next_free = bh; + (*bhp)->b_prev_free = bh; + } } static inline void insert_into_queues(struct buffer_head * bh) @@ -450,30 +462,34 @@ static inline void insert_into_queues(struct buffer_head * bh) /* put at end of free list */ if(bh->b_dev == B_FREE) { put_last_free(bh); - return; - } - if(!lru_list[bh->b_list]) { - lru_list[bh->b_list] = bh; - bh->b_prev_free = bh; + } else { + struct buffer_head **bhp = &lru_list[bh->b_list]; + + if(!*bhp) { + *bhp = bh; + bh->b_prev_free = bh; + } + + if (bh->b_next_free) + panic("VFS: buffer LRU pointers corrupted"); + + bh->b_next_free = *bhp; + bh->b_prev_free = (*bhp)->b_prev_free; + (*bhp)->b_prev_free->b_next_free = bh; + (*bhp)->b_prev_free = bh; + + nr_buffers_type[bh->b_list]++; + + /* Put the buffer in new hash-queue if it has a device. */ + if (bh->b_dev) { + struct buffer_head **bhp = &hash(bh->b_dev, bh->b_blocknr); + if((bh->b_next = *bhp) != NULL) + (*bhp)->b_pprev = &bh->b_next; + *bhp = bh; + bh->b_pprev = bhp; /* Exists in bh hashes. */ + } else + bh->b_pprev = NULL; /* Not in bh hashes. */ } - if (!next_to_age[bh->b_list]) - next_to_age[bh->b_list] = bh; - if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted"); - bh->b_next_free = lru_list[bh->b_list]; - bh->b_prev_free = lru_list[bh->b_list]->b_prev_free; - lru_list[bh->b_list]->b_prev_free->b_next_free = bh; - lru_list[bh->b_list]->b_prev_free = bh; - nr_buffers_type[bh->b_list]++; - nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++; -/* put the buffer in new hash-queue if it has a device */ - bh->b_prev = NULL; - bh->b_next = NULL; - if (!(bh->b_dev)) - return; - bh->b_next = hash(bh->b_dev,bh->b_blocknr); - hash(bh->b_dev,bh->b_blocknr) = bh; - if (bh->b_next) - bh->b_next->b_prev = bh; } static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size) @@ -481,14 +497,14 @@ static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size) struct buffer_head * tmp; for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next) - if (tmp->b_blocknr == block && tmp->b_dev == dev) + if (tmp->b_blocknr == block && tmp->b_dev == dev) { if (tmp->b_size == size) return tmp; - else { - printk("VFS: Wrong blocksize on device %s\n", - kdevname(dev)); - return NULL; - } + + printk("VFS: Wrong blocksize on device %s\n", + kdevname(dev)); + return NULL; + } return NULL; } @@ -508,15 +524,36 @@ struct buffer_head * get_hash_table(kdev_t dev, int block, int size) return NULL; bh->b_count++; wait_on_buffer(bh); - if (bh->b_dev == dev && bh->b_blocknr == block - && bh->b_size == size) + if (bh->b_dev == dev && + bh->b_blocknr == block && + bh->b_size == size) return bh; bh->b_count--; } } +unsigned int get_hardblocksize(kdev_t dev) +{ + /* + * Get the hard sector size for the given device. If we don't know + * what it is, return 0. + */ + if (hardsect_size[MAJOR(dev)] != NULL) { + int blksize = hardsect_size[MAJOR(dev)][MINOR(dev)]; + if (blksize != 0) + return blksize; + } + + /* + * We don't know what the hardware sector size for this device is. + * Return 0 indicating that we don't know. + */ + return 0; +} + void set_blocksize(kdev_t dev, int size) { + extern int *blksize_size[]; int i, nlist; struct buffer_head * bh, *bhnext; @@ -540,13 +577,15 @@ void set_blocksize(kdev_t dev, int size) sync_buffers(dev, 2); blksize_size[MAJOR(dev)][MINOR(dev)] = size; - /* We need to be quite careful how we do this - we are moving entries - around on the free list, and we can get in a loop if we are not careful.*/ - + /* We need to be quite careful how we do this - we are moving entries + * around on the free list, and we can get in a loop if we are not careful. + */ for(nlist = 0; nlist < NR_LIST; nlist++) { bh = lru_list[nlist]; for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) { - if(!bh) break; + if(!bh) + break; + bhnext = bh->b_next_free; if (bh->b_dev != dev) continue; @@ -565,191 +604,154 @@ void set_blocksize(kdev_t dev, int size) } } -#define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh)) - -void refill_freelist(int size) +/* Check if a buffer is OK to be reclaimed. */ +static inline int can_reclaim(struct buffer_head *bh, int size) { - struct buffer_head * bh, * tmp; - struct buffer_head * candidate[NR_LIST]; - unsigned int best_time, winner; - int isize = BUFSIZE_INDEX(size); - int buffers[NR_LIST]; - int i; - int needed; + if (bh->b_count || + buffer_protected(bh) || + buffer_locked(bh)) + return 0; + + if (atomic_read(&mem_map[MAP_NR((unsigned long) bh->b_data)].count) != 1 || + buffer_dirty(bh)) { + refile_buffer(bh); + return 0; + } - /* First see if we even need this. Sometimes it is advantageous - to request some blocks in a filesystem that we know that we will - be needing ahead of time. */ + if (bh->b_size != size) + return 0; - if (nr_free[isize] > 100) - return; + return 1; +} - ++refilled; - /* If there are too many dirty buffers, we wake up the update process - now so as to ensure that there are still clean buffers available - for user processes to use (and dirty) */ +/* Find a candidate buffer to be reclaimed. */ +static struct buffer_head *find_candidate(struct buffer_head *list, + int *list_len, int size) +{ + struct buffer_head *bh; - /* We are going to try to locate this much memory */ - needed =bdf_prm.b_un.nrefill * size; + for (bh = list; + bh && (*list_len) > 0; + bh = bh->b_next_free, (*list_len)--) { + if (size != bh->b_size) { + /* This provides a mechanism for freeing blocks + * of other sizes, this is necessary now that we + * no longer have the lav code. + */ + try_to_free_buffer(bh,&bh,1); + if (!bh) + break; + continue; + } - while (nr_free_pages > min_free_pages*2 && needed > 0 && - grow_buffers(GFP_BUFFER, size)) { - needed -= PAGE_SIZE; + if (buffer_locked(bh) && + (bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) { + /* Buffers are written in the order they are placed + * on the locked list. If we encounter a locked + * buffer here, this means that the rest of them + * are also locked. + */ + (*list_len) = 0; + return NULL; + } + + if (can_reclaim(bh,size)) + return bh; } - if(needed <= 0) return; + return NULL; +} + +static void refill_freelist(int size) +{ + struct buffer_head * bh; + struct buffer_head * candidate[BUF_DIRTY]; + unsigned int best_time, winner; + int buffers[BUF_DIRTY]; + int i; + int needed; - /* See if there are too many buffers of a different size. - If so, victimize them */ + refilled = 1; + /* If there are too many dirty buffers, we wake up the update process + * now so as to ensure that there are still clean buffers available + * for user processes to use (and dirty). + */ + + /* We are going to try to locate this much memory. */ + needed = bdf_prm.b_un.nrefill * size; - while(maybe_shrink_lav_buffers(size)) - { - if(!grow_buffers(GFP_BUFFER, size)) break; - needed -= PAGE_SIZE; - if(needed <= 0) return; - }; + while ((nr_free_pages > min_free_pages*2) && + (needed > 0) && + grow_buffers(GFP_BUFFER, size)) + needed -= PAGE_SIZE; +repeat: /* OK, we cannot grow the buffer cache, now try to get some - from the lru list */ + * from the lru list. + * + * First set the candidate pointers to usable buffers. This + * should be quick nearly all of the time. + */ - /* First set the candidate pointers to usable buffers. This - should be quick nearly all of the time. */ + if(needed <= 0) + return; -repeat0: - for(i=0; i<NR_LIST; i++){ - if(i == BUF_DIRTY || i == BUF_SHARED || - nr_buffers_type[i] == 0) { - candidate[i] = NULL; - buffers[i] = 0; - continue; - } + for(i=0; i<BUF_DIRTY; i++) { buffers[i] = nr_buffers_type[i]; - for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--) - { - if(buffers[i] < 0) panic("Here is the problem"); - tmp = bh->b_next_free; - if (!bh) break; - - if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 || - buffer_dirty(bh)) { - refile_buffer(bh); - continue; - } - - if (bh->b_count || buffer_protected(bh) || bh->b_size != size) - continue; - - /* Buffers are written in the order they are placed - on the locked list. If we encounter a locked - buffer here, this means that the rest of them - are also locked */ - if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) { - buffers[i] = 0; - break; - } - - if (BADNESS(bh)) continue; - break; - }; - if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */ - else candidate[i] = bh; - if(candidate[i] && candidate[i]->b_count) panic("Here is the problem"); + candidate[i] = find_candidate(lru_list[i], &buffers[i], size); } - repeat: - if(needed <= 0) return; - - /* Now see which candidate wins the election */ + /* Now see which candidate wins the election. */ winner = best_time = UINT_MAX; - for(i=0; i<NR_LIST; i++){ - if(!candidate[i]) continue; - if(candidate[i]->b_lru_time < best_time){ + for(i=0; i<BUF_DIRTY; i++) { + if(!candidate[i]) + continue; + if(candidate[i]->b_lru_time < best_time) { best_time = candidate[i]->b_lru_time; winner = i; } } - /* If we have a winner, use it, and then get a new candidate from that list */ + /* If we have a winner, use it, and then get a new candidate from that list. */ if(winner != UINT_MAX) { i = winner; - bh = candidate[i]; - candidate[i] = bh->b_next_free; - if(candidate[i] == bh) candidate[i] = NULL; /* Got last one */ - if (bh->b_count || bh->b_size != size) - panic("Busy buffer in candidate list\n"); - if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1) - panic("Shared buffer in candidate list\n"); - if (buffer_protected(bh)) - panic("Protected buffer in candidate list\n"); - if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n"); + while (needed>0 && (bh=candidate[i])) { + candidate[i] = bh->b_next_free; + if(candidate[i] == bh) + candidate[i] = NULL; /* Got last one */ + remove_from_queues(bh); + bh->b_dev = B_FREE; + put_last_free(bh); + needed -= bh->b_size; + buffers[i]--; + if(buffers[i] == 0) + candidate[i] = NULL; - if(bh->b_dev == B_FREE) - panic("Wrong list"); - remove_from_queues(bh); - bh->b_dev = B_FREE; - put_last_free(bh); - needed -= bh->b_size; - buffers[i]--; - if(buffers[i] < 0) panic("Here is the problem"); - - if(buffers[i] == 0) candidate[i] = NULL; - - /* Now all we need to do is advance the candidate pointer - from the winner list to the next usable buffer */ - if(candidate[i] && buffers[i] > 0){ - if(buffers[i] <= 0) panic("Here is another problem"); - for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) { - if(buffers[i] < 0) panic("Here is the problem"); - tmp = bh->b_next_free; - if (!bh) break; - - if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 || - buffer_dirty(bh)) { - refile_buffer(bh); - continue; - }; - - if (bh->b_count || buffer_protected(bh) || bh->b_size != size) - continue; - - /* Buffers are written in the order they are - placed on the locked list. If we encounter - a locked buffer here, this means that the - rest of them are also locked */ - if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) { - buffers[i] = 0; - break; - } - - if (BADNESS(bh)) continue; - break; - }; - if(!buffers[i]) candidate[i] = NULL; /* Nothing here */ - else candidate[i] = bh; - if(candidate[i] && candidate[i]->b_count) - panic("Here is the problem"); + if (candidate[i] && !can_reclaim(candidate[i],size)) + candidate[i] = find_candidate(candidate[i], + &buffers[i], size); } - - goto repeat; + if (needed >= 0) + goto repeat; } - if(needed <= 0) return; + if(needed <= 0) + return; /* Too bad, that was not enough. Try a little harder to grow some. */ - if (nr_free_pages > min_free_pages + 5) { if (grow_buffers(GFP_BUFFER, size)) { needed -= PAGE_SIZE; - goto repeat0; - }; + goto repeat; + } } - /* and repeat until we find something good */ + /* And repeat until we find something good. */ if (!grow_buffers(GFP_ATOMIC, size)) wakeup_bdflush(1); needed -= PAGE_SIZE; - goto repeat0; + goto repeat; } /* @@ -767,12 +769,10 @@ struct buffer_head * getblk(kdev_t dev, int block, int size) struct buffer_head * bh; int isize = BUFSIZE_INDEX(size); - /* Update this for the buffer size lav. */ - buffer_usage[isize]++; - /* If there are too many dirty buffers, we wake up the update process - now so as to ensure that there are still clean buffers available - for user processes to use (and dirty) */ + * now so as to ensure that there are still clean buffers available + * for user processes to use (and dirty). + */ repeat: bh = get_hash_table(dev, block, size); if (bh) { @@ -785,7 +785,8 @@ repeat: return bh; } - while(!free_list[isize]) refill_freelist(size); + while(!free_list[isize]) + refill_freelist(size); if (find_buffer(dev,block,size)) goto repeat; @@ -793,8 +794,9 @@ repeat: bh = free_list[isize]; remove_from_free_list(bh); -/* OK, FINALLY we know that this buffer is the only one of its kind, */ -/* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */ + /* OK, FINALLY we know that this buffer is the only one of its kind, + * and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean. + */ bh->b_count=1; bh->b_flushtime=0; bh->b_state=(1<<BH_Touched); @@ -809,7 +811,7 @@ void set_writetime(struct buffer_head * buf, int flag) int newtime; if (buffer_dirty(buf)) { - /* Move buffer to dirty list if jiffies is clear */ + /* Move buffer to dirty list if jiffies is clear. */ newtime = jiffies + (flag ? bdf_prm.b_un.age_super : bdf_prm.b_un.age_buffer); if(!buf->b_flushtime || buf->b_flushtime > newtime) @@ -827,7 +829,6 @@ void set_writetime(struct buffer_head * buf, int flag) void refile_buffer(struct buffer_head * buf) { int dispose; - int isize; if(buf->b_dev == B_FREE) { printk("Attempt to refile free buffer\n"); @@ -835,17 +836,14 @@ void refile_buffer(struct buffer_head * buf) } if (buffer_dirty(buf)) dispose = BUF_DIRTY; - else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf)) - dispose = BUF_SHARED; else if (buffer_locked(buf)) dispose = BUF_LOCKED; - else if (buf->b_list == BUF_SHARED) - dispose = BUF_UNSHARED; else dispose = BUF_CLEAN; - if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies; - if(dispose != buf->b_list) { - if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED) + if(dispose == BUF_CLEAN) + buf->b_lru_time = jiffies; + if(dispose != buf->b_list) { + if(dispose == BUF_DIRTY) buf->b_lru_time = jiffies; if(dispose == BUF_LOCKED && (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super) @@ -854,19 +852,21 @@ void refile_buffer(struct buffer_head * buf) buf->b_list = dispose; insert_into_queues(buf); if (dispose == BUF_DIRTY) { - /* This buffer is dirty, maybe we need to start flushing. */ - /* If too high a percentage of the buffers are dirty... */ - if (nr_buffers_type[BUF_DIRTY] > - (nr_buffers - nr_buffers_type[BUF_SHARED]) * - bdf_prm.b_un.nfract/100) - wakeup_bdflush(0); - /* If this is a loop device, and - * more than half of the buffers of this size are dirty... */ - /* (Prevents no-free-buffers deadlock with loop device.) */ - isize = BUFSIZE_INDEX(buf->b_size); - if (MAJOR(buf->b_dev) == LOOP_MAJOR && - nr_buffers_st[isize][BUF_DIRTY]*2>nr_buffers_size[isize]) - wakeup_bdflush(1); + int too_many = (nr_buffers * bdf_prm.b_un.nfract/100); + + /* This buffer is dirty, maybe we need to start flushing. + * If too high a percentage of the buffers are dirty... + */ + if (nr_buffers_type[BUF_DIRTY] > too_many) + wakeup_bdflush(0); + + /* If this is a loop device, and + * more than half of the buffers are dirty... + * (Prevents no-free-buffers deadlock with loop device.) + */ + if (MAJOR(buf->b_dev) == LOOP_MAJOR && + nr_buffers_type[BUF_DIRTY]*2>nr_buffers) + wakeup_bdflush(1); } } } @@ -878,7 +878,7 @@ void __brelse(struct buffer_head * buf) { wait_on_buffer(buf); - /* If dirty, mark the time this buffer should be written back */ + /* If dirty, mark the time this buffer should be written back. */ set_writetime(buf, 0); refile_buffer(buf); @@ -977,13 +977,13 @@ struct buffer_head * breada(kdev_t dev, int block, int bufsize, else bhlist[j++] = bh; } - /* Request the read for these buffers, and then release them */ + /* Request the read for these buffers, and then release them. */ if (j>1) ll_rw_block(READA, (j-1), bhlist+1); for(i=1; i<j; i++) brelse(bhlist[i]); - /* Wait for this buffer, and then continue on */ + /* Wait for this buffer, and then continue on. */ bh = bhlist[0]; wait_on_buffer(bh); if (buffer_uptodate(bh)) @@ -992,11 +992,15 @@ struct buffer_head * breada(kdev_t dev, int block, int bufsize, return NULL; } -/* - * See fs/inode.c for the weird use of volatile.. - */ static void put_unused_buffer_head(struct buffer_head * bh) { + if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) { + nr_buffer_heads--; + kmem_cache_free(bh_cachep, bh); + return; + } + memset(bh,0,sizeof(*bh)); + nr_unused_buffer_heads++; bh->b_next_free = unused_list; unused_list = bh; wake_up(&buffer_wait); @@ -1004,24 +1008,20 @@ static void put_unused_buffer_head(struct buffer_head * bh) static void get_more_buffer_heads(void) { - int i; struct buffer_head * bh; - for (;;) { - if (unused_list) - return; - - /* - * This is critical. We can't swap out pages to get + while (!unused_list) { + /* This is critical. We can't swap out pages to get * more buffer heads, because the swap-out may need - * more buffer-heads itself. Thus GFP_ATOMIC. + * more buffer-heads itself. Thus SLAB_ATOMIC. */ - bh = (struct buffer_head *) get_free_page(GFP_ATOMIC); - if (bh) - break; + if((bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC)) != NULL) { + put_unused_buffer_head(bh); + nr_buffer_heads++; + return; + } - /* - * Uhhuh. We're _really_ low on memory. Now we just + /* Uhhuh. We're _really_ low on memory. Now we just * wait for old buffer heads to become free due to * finishing IO.. */ @@ -1029,10 +1029,6 @@ static void get_more_buffer_heads(void) sleep_on(&buffer_wait); } - for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) { - bh->b_next_free = unused_list; /* only make link */ - unused_list = bh++; - } } /* @@ -1051,17 +1047,15 @@ static void get_more_buffer_heads(void) static inline void recover_reusable_buffer_heads(void) { if (reuse_list) { - struct buffer_head *bh; - unsigned long flags; + struct buffer_head *head; + + head = xchg(&reuse_list, NULL); - save_flags(flags); do { - cli(); - bh = reuse_list; - reuse_list = bh->b_next_free; - restore_flags(flags); + struct buffer_head *bh = head; + head = head->b_next_free; put_unused_buffer_head(bh); - } while (reuse_list); + } while (head); } } @@ -1075,6 +1069,7 @@ static struct buffer_head * get_unused_buffer_head(void) return NULL; bh = unused_list; unused_list = bh->b_next_free; + nr_unused_buffer_heads--; return bh; } @@ -1351,7 +1346,7 @@ int generic_readpage(struct inode * inode, struct page * page) int *p, nr[PAGE_SIZE/512]; int i; - page->count++; + atomic_inc(&page->count); set_bit(PG_locked, &page->flags); set_bit(PG_free_after, &page->flags); @@ -1400,7 +1395,6 @@ static int grow_buffers(int pri, int size) tmp = bh; while (1) { - nr_free[isize]++; if (insert_point) { tmp->b_next_free = insert_point->b_next_free; tmp->b_prev_free = insert_point; @@ -1412,7 +1406,6 @@ static int grow_buffers(int pri, int size) } insert_point = tmp; ++nr_buffers; - ++nr_buffers_size[isize]; if (tmp->b_this_page) tmp = tmp->b_this_page; else @@ -1442,7 +1435,6 @@ int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp, { unsigned long page; struct buffer_head * tmp, * p; - int isize = BUFSIZE_INDEX(bh->b_size); *bhp = bh; page = (unsigned long) bh->b_data; @@ -1464,193 +1456,20 @@ int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp, p = tmp; tmp = tmp->b_this_page; nr_buffers--; - nr_buffers_size[isize]--; - if (p == *bhp) - { - *bhp = p->b_prev_free; - if (p == *bhp) /* Was this the last in the list? */ - *bhp = NULL; - } + if (p == *bhp) { + *bhp = p->b_prev_free; + if (p == *bhp) /* Was this the last in the list? */ + *bhp = NULL; + } remove_from_queues(p); put_unused_buffer_head(p); } while (tmp != bh); buffermem -= PAGE_SIZE; mem_map[MAP_NR(page)].buffers = NULL; free_page(page); - return !mem_map[MAP_NR(page)].count; + return !atomic_read(&mem_map[MAP_NR(page)].count); } -/* Age buffers on a given page, according to whether they have been - visited recently or not. */ -static inline void age_buffer(struct buffer_head *bh) -{ - struct buffer_head *tmp = bh; - int touched = 0; - - /* - * When we age a page, we mark all other buffers in the page - * with the "has_aged" flag. Then, when these aliased buffers - * come up for aging, we skip them until next pass. This - * ensures that a page full of multiple buffers only gets aged - * once per pass through the lru lists. - */ - if (clear_bit(BH_Has_aged, &bh->b_state)) - return; - - do { - touched |= clear_bit(BH_Touched, &tmp->b_state); - tmp = tmp->b_this_page; - set_bit(BH_Has_aged, &tmp->b_state); - } while (tmp != bh); - clear_bit(BH_Has_aged, &bh->b_state); - - if (touched) - touch_page(mem_map + MAP_NR((unsigned long) bh->b_data)); - else - age_page(mem_map + MAP_NR((unsigned long) bh->b_data)); -} - -/* - * Consult the load average for buffers and decide whether or not - * we should shrink the buffers of one size or not. If we decide yes, - * do it and return 1. Else return 0. Do not attempt to shrink size - * that is specified. - * - * I would prefer not to use a load average, but the way things are now it - * seems unavoidable. The way to get rid of it would be to force clustering - * universally, so that when we reclaim buffers we always reclaim an entire - * page. Doing this would mean that we all need to move towards QMAGIC. - */ - -static int maybe_shrink_lav_buffers(int size) -{ - int nlist; - int isize; - int total_lav, total_n_buffers, n_sizes; - - /* Do not consider the shared buffers since they would not tend - to have getblk called very often, and this would throw off - the lav. They are not easily reclaimable anyway (let the swapper - make the first move). */ - - total_lav = total_n_buffers = n_sizes = 0; - for(nlist = 0; nlist < NR_SIZES; nlist++) - { - total_lav += buffers_lav[nlist]; - if(nr_buffers_size[nlist]) n_sizes++; - total_n_buffers += nr_buffers_size[nlist]; - total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; - } - - /* See if we have an excessive number of buffers of a particular - size - if so, victimize that bunch. */ - - isize = (size ? BUFSIZE_INDEX(size) : -1); - - if (n_sizes > 1) - for(nlist = 0; nlist < NR_SIZES; nlist++) - { - if(nlist == isize) continue; - if(nr_buffers_size[nlist] && - bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < - total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED])) - if(shrink_specific_buffers(6, bufferindex_size[nlist])) - return 1; - } - return 0; -} - -/* - * Try to free up some pages by shrinking the buffer-cache - * - * Priority tells the routine how hard to try to shrink the - * buffers: 6 means "don't bother too much", while a value - * of 0 means "we'd better get some free pages now". - * - * "limit" is meant to limit the shrink-action only to pages - * that are in the 0 - limit address range, for DMA re-allocations. - * We ignore that right now. - */ - -static int shrink_specific_buffers(unsigned int priority, int size) -{ - struct buffer_head *bh; - int nlist; - int i, isize, isize1; - -#ifdef DEBUG - if(size) printk("Shrinking buffers of size %d\n", size); -#endif - /* First try the free lists, and see if we can get a complete page - from here */ - isize1 = (size ? BUFSIZE_INDEX(size) : -1); - - for(isize = 0; isize<NR_SIZES; isize++){ - if(isize1 != -1 && isize1 != isize) continue; - bh = free_list[isize]; - if(!bh) continue; - for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) { - if (bh->b_count || buffer_protected(bh) || - !bh->b_this_page) - continue; - if (!age_of((unsigned long) bh->b_data) && - try_to_free_buffer(bh, &bh, 6)) - return 1; - if(!bh) break; - /* Some interrupt must have used it after we - freed the page. No big deal - keep looking */ - } - } - - /* Not enough in the free lists, now try the lru list */ - - for(nlist = 0; nlist < NR_LIST; nlist++) { - repeat1: - if(priority > 2 && nlist == BUF_SHARED) continue; - i = nr_buffers_type[nlist]; - i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority; - for ( ; i > 0; i-- ) { - bh = next_to_age[nlist]; - if (!bh) - break; - next_to_age[nlist] = bh->b_next_free; - - /* First, age the buffer. */ - age_buffer(bh); - /* We may have stalled while waiting for I/O - to complete. */ - if(bh->b_list != nlist) goto repeat1; - if (bh->b_count || buffer_protected(bh) || - !bh->b_this_page) - continue; - if(size && bh->b_size != size) continue; - if (buffer_locked(bh)) - if (priority) - continue; - else - wait_on_buffer(bh); - if (buffer_dirty(bh)) { - bh->b_count++; - bh->b_flushtime = 0; - ll_rw_block(WRITEA, 1, &bh); - bh->b_count--; - continue; - } - /* At priority 6, only consider really old - (age==0) buffers for reclaiming. At - priority 0, consider any buffers. */ - if ((age_of((unsigned long) bh->b_data) >> - (6-priority)) > 0) - continue; - if (try_to_free_buffer(bh, &bh, 0)) - return 1; - if(!bh) break; - } - } - return 0; -} - - /* ================== Debugging =================== */ void show_buffers(void) @@ -1658,17 +1477,18 @@ void show_buffers(void) struct buffer_head * bh; int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0; int protected = 0; - int shared; - int nlist, isize; + int nlist; + static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","LOCKED1","DIRTY"}; printk("Buffer memory: %6dkB\n",buffermem>>10); printk("Buffer heads: %6d\n",nr_buffer_heads); printk("Buffer blocks: %6d\n",nr_buffers); for(nlist = 0; nlist < NR_LIST; nlist++) { - shared = found = locked = dirty = used = lastused = protected = 0; + found = locked = dirty = used = lastused = protected = 0; bh = lru_list[nlist]; if(!bh) continue; + do { found++; if (buffer_locked(bh)) @@ -1677,260 +1497,42 @@ void show_buffers(void) protected++; if (buffer_dirty(bh)) dirty++; - if (mem_map[MAP_NR(((unsigned long) bh->b_data))].count != 1) - shared++; if (bh->b_count) used++, lastused = found; bh = bh->b_next_free; } while (bh != lru_list[nlist]); - printk("Buffer[%d] mem: %d buffers, %d used (last=%d), " - "%d locked, %d protected, %d dirty %d shrd\n", - nlist, found, used, lastused, - locked, protected, dirty, shared); - }; - printk("Size [LAV] Free Clean Unshar Lck Lck1 Dirty Shared \n"); - for(isize = 0; isize<NR_SIZES; isize++){ - printk("%5d [%5d]: %7d ", bufferindex_size[isize], - buffers_lav[isize], nr_free[isize]); - for(nlist = 0; nlist < NR_LIST; nlist++) - printk("%7d ", nr_buffers_st[isize][nlist]); - printk("\n"); - } -} - - -/* ====================== Cluster patches for ext2 ==================== */ - -/* - * try_to_reassign() checks if all the buffers on this particular page - * are unused, and reassign to a new cluster them if this is true. - */ -static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp, - kdev_t dev, unsigned int starting_block) -{ - unsigned long page; - struct buffer_head * tmp, * p; - - *bhp = bh; - page = (unsigned long) bh->b_data; - page &= PAGE_MASK; - if(mem_map[MAP_NR(page)].count != 1) return 0; - tmp = bh; - do { - if (!tmp) - return 0; - - if (tmp->b_count || buffer_protected(tmp) || - buffer_dirty(tmp) || buffer_locked(tmp)) - return 0; - tmp = tmp->b_this_page; - } while (tmp != bh); - tmp = bh; - - while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) - tmp = tmp->b_this_page; - - /* This is the buffer at the head of the page */ - bh = tmp; - do { - p = tmp; - tmp = tmp->b_this_page; - remove_from_queues(p); - p->b_dev = dev; - mark_buffer_uptodate(p, 0); - clear_bit(BH_Req, &p->b_state); - p->b_blocknr = starting_block++; - insert_into_queues(p); - } while (tmp != bh); - return 1; -} - -/* - * Try to find a free cluster by locating a page where - * all of the buffers are unused. We would like this function - * to be atomic, so we do not call anything that might cause - * the process to sleep. The priority is somewhat similar to - * the priority used in shrink_buffers. - * - * My thinking is that the kernel should end up using whole - * pages for the buffer cache as much of the time as possible. - * This way the other buffers on a particular page are likely - * to be very near each other on the free list, and we will not - * be expiring data prematurely. For now we only cannibalize buffers - * of the same size to keep the code simpler. - */ -static int reassign_cluster(kdev_t dev, - unsigned int starting_block, int size) -{ - struct buffer_head *bh; - int isize = BUFSIZE_INDEX(size); - int i; - - /* We want to give ourselves a really good shot at generating - a cluster, and since we only take buffers from the free - list, we "overfill" it a little. */ - - while(nr_free[isize] < 32) refill_freelist(size); - - bh = free_list[isize]; - if(bh) - for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) { - if (!bh->b_this_page) continue; - if (try_to_reassign(bh, &bh, dev, starting_block)) - return 4; - } - return 0; -} - -/* This function tries to generate a new cluster of buffers - * from a new page in memory. We should only do this if we have - * not expanded the buffer cache to the maximum size that we allow. - */ -static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size) -{ - struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE]; - int isize = BUFSIZE_INDEX(size); - unsigned long offset; - unsigned long page; - int nblock; - - page = get_free_page(GFP_NOBUFFER); - if(!page) return 0; - - bh = create_buffers(page, size); - if (!bh) { - free_page(page); - return 0; - }; - nblock = block; - for (offset = 0 ; offset < PAGE_SIZE ; offset += size) { - if (find_buffer(dev, nblock++, size)) - goto not_aligned; - } - tmp = bh; - nblock = 0; - while (1) { - arr[nblock++] = bh; - bh->b_count = 1; - bh->b_flushtime = 0; - bh->b_state = 0; - bh->b_dev = dev; - bh->b_list = BUF_CLEAN; - bh->b_blocknr = block++; - nr_buffers++; - nr_buffers_size[isize]++; - insert_into_queues(bh); - if (bh->b_this_page) - bh = bh->b_this_page; - else - break; - } - buffermem += PAGE_SIZE; - mem_map[MAP_NR(page)].buffers = bh; - bh->b_this_page = tmp; - while (nblock-- > 0) - brelse(arr[nblock]); - return 4; /* ?? */ -not_aligned: - while ((tmp = bh) != NULL) { - bh = bh->b_this_page; - put_unused_buffer_head(tmp); - } - free_page(page); - return 0; -} - -unsigned long generate_cluster(kdev_t dev, int b[], int size) -{ - int i, offset; - - for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) { - if(i && b[i]-1 != b[i-1]) return 0; /* No need to cluster */ - if(find_buffer(dev, b[i], size)) return 0; + printk("%8s: %d buffers, %d used (last=%d), " + "%d locked, %d protected, %d dirty\n", + buf_types[nlist], found, used, lastused, + locked, protected, dirty); }; - - /* OK, we have a candidate for a new cluster */ - - /* See if one size of buffer is over-represented in the buffer cache, - if so reduce the numbers of buffers */ - if(maybe_shrink_lav_buffers(size)) - { - int retval; - retval = try_to_generate_cluster(dev, b[0], size); - if(retval) return retval; - }; - - if (nr_free_pages > min_free_pages*2) - return try_to_generate_cluster(dev, b[0], size); - else - return reassign_cluster(dev, b[0], size); } -unsigned long generate_cluster_swab32(kdev_t dev, int b[], int size) -{ - int i, offset; - - for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) { - if(i && le32_to_cpu(b[i])-1 != - le32_to_cpu(b[i-1])) return 0; /* No need to cluster */ - if(find_buffer(dev, le32_to_cpu(b[i]), size)) return 0; - }; - - /* OK, we have a candidate for a new cluster */ - - /* See if one size of buffer is over-represented in the buffer cache, - if so reduce the numbers of buffers */ - if(maybe_shrink_lav_buffers(size)) - { - int retval; - retval = try_to_generate_cluster(dev, le32_to_cpu(b[0]), size); - if(retval) return retval; - }; - - if (nr_free_pages > min_free_pages*2) - return try_to_generate_cluster(dev, le32_to_cpu(b[0]), size); - else - return reassign_cluster(dev, le32_to_cpu(b[0]), size); -} /* ===================== Init ======================= */ /* - * This initializes the initial buffer free list. nr_buffers_type is set - * to one less the actual number of buffers, as a sop to backwards - * compatibility --- the old code did this (I think unintentionally, - * but I'm not sure), and programs in the ps package expect it. - * - TYT 8/30/92 + * allocate the hash table and init the free list + * Use gfp() for the hash table to decrease TLB misses, use + * SLAB cache for buffer heads. */ void buffer_init(void) { - int i; - int isize = BUFSIZE_INDEX(BLOCK_SIZE); - long memsize = max_mapnr << PAGE_SHIFT; - - if (memsize >= 64*1024*1024) - nr_hash = 65521; - else if (memsize >= 32*1024*1024) - nr_hash = 32749; - else if (memsize >= 16*1024*1024) - nr_hash = 16381; - else if (memsize >= 8*1024*1024) - nr_hash = 8191; - else if (memsize >= 4*1024*1024) - nr_hash = 4093; - else nr_hash = 997; - - hash_table = (struct buffer_head **) vmalloc(nr_hash * - sizeof(struct buffer_head *)); - + hash_table = (struct buffer_head **) + __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER, 0); + if (!hash_table) + panic("Failed to allocate buffer hash table\n"); + memset(hash_table,0,NR_HASH*sizeof(struct buffer_head *)); + + bh_cachep = kmem_cache_create("buffer_head", + sizeof(struct buffer_head), + sizeof(unsigned long) * 4, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if(!bh_cachep) + panic("Cannot create buffer head SLAB cache\n"); - for (i = 0 ; i < nr_hash ; i++) - hash_table[i] = NULL; lru_list[BUF_CLEAN] = 0; grow_buffers(GFP_KERNEL, BLOCK_SIZE); - if (!free_list[isize]) - panic("VFS: Unable to initialize buffer free list!"); - return; } @@ -1966,7 +1568,7 @@ static void wakeup_bdflush(int wait) asmlinkage int sync_old_buffers(void) { - int i, isize; + int i; int ndirty, nwritten; int nlist; int ncount; @@ -1985,6 +1587,7 @@ asmlinkage int sync_old_buffers(void) ndirty = 0; nwritten = 0; repeat: + bh = lru_list[nlist]; if(bh) for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) { @@ -2022,13 +1625,6 @@ asmlinkage int sync_old_buffers(void) printk("Wrote %d/%d buffers\n", nwritten, ndirty); #endif - /* We assume that we only come through here on a regular - schedule, like every 5 seconds. Now update load averages. - Shift usage counts to prevent overflow. */ - for(isize = 0; isize<NR_SIZES; isize++){ - CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]); - buffer_usage[isize] = 0; - } return 0; } @@ -2040,37 +1636,42 @@ asmlinkage int sync_old_buffers(void) asmlinkage int sys_bdflush(int func, long data) { - int i, error; + int i, error = -EPERM; + lock_kernel(); if (!suser()) - return -EPERM; + goto out; - if (func == 1) - return sync_old_buffers(); + if (func == 1) { + error = sync_old_buffers(); + goto out; + } /* Basically func 1 means read param 1, 2 means write param 1, etc */ if (func >= 2) { i = (func-2) >> 1; + error = -EINVAL; if (i < 0 || i >= N_PARAM) - return -EINVAL; + goto out; if((func & 1) == 0) { - error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int)); - if (error) - return error; - put_user(bdf_prm.data[i], (int*)data); - return 0; - }; + error = put_user(bdf_prm.data[i], (int*)data); + goto out; + } if (data < bdflush_min[i] || data > bdflush_max[i]) - return -EINVAL; + goto out; bdf_prm.data[i] = data; - return 0; + error = 0; + goto out; }; /* Having func 0 used to launch the actual bdflush and then never - return (unless explicitly killed). We return zero here to - remain semi-compatible with present update(8) programs. */ - - return 0; + * return (unless explicitly killed). We return zero here to + * remain semi-compatible with present update(8) programs. + */ + error = 0; +out: + unlock_kernel(); + return error; } /* This is the actual bdflush daemon itself. It used to be started from @@ -2111,11 +1712,7 @@ int bdflush(void * unused) * and other internals and thus be subject to the SMP locking * rules. (On a uniprocessor box this does nothing). */ - -#ifdef __SMP__ lock_kernel(); - syscall_count++; -#endif for (;;) { #ifdef DEBUG @@ -2132,6 +1729,7 @@ int bdflush(void * unused) ndirty = 0; refilled = 0; repeat: + bh = lru_list[nlist]; if(bh) for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; @@ -2192,29 +1790,9 @@ int bdflush(void * unused) /* If there are still a lot of dirty buffers around, skip the sleep and flush some more */ - - if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * - bdf_prm.b_un.nfract/100) { + if(nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) { current->signal = 0; interruptible_sleep_on(&bdflush_wait); } } } - - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-indent-level: 8 - * c-brace-imaginary-offset: 0 - * c-brace-offset: -8 - * c-argdecl-indent: 8 - * c-label-offset: -8 - * c-continued-statement-offset: 8 - * c-continued-brace-offset: 0 - * End: - */ diff --git a/fs/dcache.c b/fs/dcache.c index 809c21528..2dc317aad 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -4,6 +4,8 @@ * (C) Copyright 1994 Linus Torvalds */ +/* Speeded up searches a bit and threaded the mess. -DaveM */ + /* * The directory cache is a "two-level" cache, each level doing LRU on * its entries. Adding new entries puts them at the end of the LRU @@ -21,24 +23,26 @@ #include <linux/fs.h> #include <linux/string.h> +#include <asm/unaligned.h> +#include <asm/spinlock.h> + +spinlock_t dcache_lock = SPIN_LOCK_UNLOCKED; + /* * Don't bother caching long names.. They just take up space in the cache, and * for a name cache you just want to cache the "normal" names anyway which tend * to be short. */ #define DCACHE_NAME_LEN 15 -#define DCACHE_SIZE 128 - -struct hash_list { - struct dir_cache_entry * next; - struct dir_cache_entry * prev; -}; +#define DCACHE_SIZE 1024 +#define DCACHE_HASH_QUEUES 256 /* keep this a pow2 */ /* * The dir_cache_entry must be in this order: we do ugly things with the pointers */ struct dir_cache_entry { - struct hash_list h; + struct dir_cache_entry *next; + struct dir_cache_entry **pprev; kdev_t dc_dev; unsigned long dir; unsigned long version; @@ -65,14 +69,34 @@ static struct dir_cache_entry level2_cache[DCACHE_SIZE]; static struct dir_cache_entry * level1_head; static struct dir_cache_entry * level2_head; +/* The hash queues are layed out in a slightly different manner. */ +static struct dir_cache_entry *hash_table[DCACHE_HASH_QUEUES]; + +#define hash_fn(dev,dir,namehash) \ + ((HASHDEV(dev) ^ (dir) ^ (namehash)) & (DCACHE_HASH_QUEUES - 1)) + /* - * The hash-queues are also doubly-linked circular lists, but the head is - * itself on the doubly-linked list, not just a pointer to the first entry. + * Stupid name"hash" algorithm. Write something better if you want to, + * but I doubt it matters that much. */ -#define DCACHE_HASH_QUEUES 32 -#define hash_fn(dev,dir,namehash) ((HASHDEV(dev) ^ (dir) ^ (namehash)) % DCACHE_HASH_QUEUES) +static unsigned long namehash(const char * name, int len) +{ + unsigned long hash = 0; -static struct hash_list hash_table[DCACHE_HASH_QUEUES]; + while ((len -= sizeof(unsigned long)) > 0) { + hash += get_unaligned((unsigned long *)name); + name += sizeof(unsigned long); + } + return hash + + (get_unaligned((unsigned long *)name) & + ~(~0UL << ((len + sizeof(unsigned long)) << 3))); +} + +static inline struct dir_cache_entry **get_hlist(struct inode *dir, + const char *name, int len) +{ + return hash_table + hash_fn(dir->i_dev, dir->i_ino, namehash(name, len)); +} static inline void remove_lru(struct dir_cache_entry * de) { @@ -104,68 +128,50 @@ static inline void update_lru(struct dir_cache_entry * de) } /* - * Stupid name"hash" algorithm. Write something better if you want to, - * but I doubt it matters that much - */ -static inline unsigned long namehash(const char * name, int len) -{ - return len + - ((const unsigned char *) name)[0]+ - ((const unsigned char *) name)[len-1]; -} - -/* * Hash queue manipulation. Look out for the casts.. + * + * What casts? 8-) -DaveM */ static inline void remove_hash(struct dir_cache_entry * de) { - struct dir_cache_entry * next = de->h.next; - - if (next) { - struct dir_cache_entry * prev = de->h.prev; - next->h.prev = prev; - prev->h.next = next; - de->h.next = NULL; + if(de->pprev) { + if(de->next) + de->next->pprev = de->pprev; + *de->pprev = de->next; + de->pprev = NULL; } } -static inline void add_hash(struct dir_cache_entry * de, struct hash_list * hash) +static inline void add_hash(struct dir_cache_entry * de, struct dir_cache_entry ** hash) { - struct dir_cache_entry * next = hash->next; - de->h.next = next; - de->h.prev = (struct dir_cache_entry *) hash; - next->h.prev = de; - hash->next = de; + if((de->next = *hash) != NULL) + (*hash)->pprev = &de->next; + *hash = de; + de->pprev = hash; } /* * Find a directory cache entry given all the necessary info. */ -static inline struct dir_cache_entry * find_entry(struct inode * dir, const char * name, int len, struct hash_list * hash) +static inline struct dir_cache_entry * find_entry(struct inode * dir, const char * name, int len, struct dir_cache_entry ** hash) { - struct dir_cache_entry * de = hash->next; - - for (de = hash->next ; de != (struct dir_cache_entry *) hash ; de = de->h.next) { - if (de->dc_dev != dir->i_dev) - continue; - if (de->dir != dir->i_ino) - continue; - if (de->version != dir->i_version) - continue; - if (de->name_len != len) - continue; - if (memcmp(de->name, name, len)) - continue; - return de; - } - return NULL; + struct dir_cache_entry *de; + + for(de = *hash; de; de = de->next) + if((de->name_len == (unsigned char) len) && + (de->dc_dev == dir->i_dev) && + (de->dir == dir->i_ino) && + (de->version == dir->i_version) && + (!memcmp(de->name, name, len))) + break; + return de; } /* * Move a successfully used entry to level2. If already at level2, * move it to the end of the LRU queue.. */ -static inline void move_to_level2(struct dir_cache_entry * old_de, struct hash_list * hash) +static inline void move_to_level2(struct dir_cache_entry * old_de, struct dir_cache_entry ** hash) { struct dir_cache_entry * de; @@ -182,43 +188,49 @@ static inline void move_to_level2(struct dir_cache_entry * old_de, struct hash_l int dcache_lookup(struct inode * dir, const char * name, int len, unsigned long * ino) { - struct hash_list * hash; - struct dir_cache_entry *de; - - if (len > DCACHE_NAME_LEN) - return 0; - hash = hash_table + hash_fn(dir->i_dev, dir->i_ino, namehash(name,len)); - de = find_entry(dir, name, len, hash); - if (!de) - return 0; - *ino = de->ino; - move_to_level2(de, hash); - return 1; + int ret = 0; + + if(len <= DCACHE_NAME_LEN) { + struct dir_cache_entry **hash = get_hlist(dir, name, len); + struct dir_cache_entry *de; + + spin_lock(&dcache_lock); + de = find_entry(dir, name, len, hash); + if(de) { + *ino = de->ino; + move_to_level2(de, hash); + ret = 1; + } + spin_unlock(&dcache_lock); + } + return ret; } void dcache_add(struct inode * dir, const char * name, int len, unsigned long ino) { - struct hash_list * hash; - struct dir_cache_entry *de; - - if (len > DCACHE_NAME_LEN) - return; - hash = hash_table + hash_fn(dir->i_dev, dir->i_ino, namehash(name,len)); - if ((de = find_entry(dir, name, len, hash)) != NULL) { - de->ino = ino; - update_lru(de); - return; + if (len <= DCACHE_NAME_LEN) { + struct dir_cache_entry **hash = get_hlist(dir, name, len); + struct dir_cache_entry *de; + + spin_lock(&dcache_lock); + de = find_entry(dir, name, len, hash); + if (de) { + de->ino = ino; + update_lru(de); + } else { + de = level1_head; + level1_head = de->next_lru; + remove_hash(de); + de->dc_dev = dir->i_dev; + de->dir = dir->i_ino; + de->version = dir->i_version; + de->ino = ino; + de->name_len = len; + memcpy(de->name, name, len); + add_hash(de, hash); + } + spin_unlock(&dcache_lock); } - de = level1_head; - level1_head = de->next_lru; - remove_hash(de); - de->dc_dev = dir->i_dev; - de->dir = dir->i_ino; - de->version = dir->i_version; - de->ino = ino; - de->name_len = len; - memcpy(de->name, name, len); - add_hash(de, hash); } unsigned long name_cache_init(unsigned long mem_start, unsigned long mem_end) @@ -258,7 +270,7 @@ unsigned long name_cache_init(unsigned long mem_start, unsigned long mem_end) * Empty hash queues.. */ for (i = 0 ; i < DCACHE_HASH_QUEUES ; i++) - hash_table[i].next = hash_table[i].next = - (struct dir_cache_entry *) &hash_table[i]; + hash_table[i] = NULL; + return mem_start; } diff --git a/fs/devices.c b/fs/devices.c index 94e5cad61..6ea9880ba 100644 --- a/fs/devices.c +++ b/fs/devices.c @@ -13,7 +13,6 @@ #include <linux/major.h> #include <linux/string.h> #include <linux/sched.h> -#include <linux/ext_fs.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/errno.h> @@ -236,11 +235,12 @@ int blkdev_open(struct inode * inode, struct file * filp) return ret; } -void blkdev_release(struct inode * inode) +int blkdev_release(struct inode * inode) { struct file_operations *fops = get_blkfops(MAJOR(inode->i_rdev)); if (fops && fops->release) - fops->release(inode,NULL); + return fops->release(inode,NULL); + return 0; } @@ -254,7 +254,7 @@ struct file_operations def_blk_fops = { NULL, /* read */ NULL, /* write */ NULL, /* readdir */ - NULL, /* select */ + NULL, /* poll */ NULL, /* ioctl */ NULL, /* mmap */ blkdev_open, /* open */ @@ -307,7 +307,7 @@ struct file_operations def_chr_fops = { NULL, /* read */ NULL, /* write */ NULL, /* readdir */ - NULL, /* select */ + NULL, /* poll */ NULL, /* ioctl */ NULL, /* mmap */ chrdev_open, /* open */ diff --git a/fs/dquot.c b/fs/dquot.c index 2c8778995..5236ed38e 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -13,12 +13,13 @@ * diskquota system. This implementation is not based on any BSD * kernel sourcecode. * - * Version: $Id: dquot.c,v 5.6 1995/11/15 20:30:27 mvw Exp mvw $ + * Version: $Id: dquot.c,v 1.11 1997/01/06 06:53:02 davem Exp $ * * Author: Marco van Wieringen <mvw@mcs.ow.nl> <mvw@tnix.net> * * Fixes: Dmitry Gorodchanin <begemot@bgm.rosprint.net>, 11 Feb 96 * removed race conditions in dqput(), dqget() and iput(). + * Andi Kleen removed all verify_area() calls, 31 Dec 96 * * (C) Copyright 1994, 1995 Marco van Wieringen * @@ -34,6 +35,8 @@ #include <linux/tty.h> #include <linux/malloc.h> #include <linux/mount.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -585,15 +588,13 @@ static int set_dqblk(kdev_t dev, int id, short type, int flags, struct dqblk *dq { struct dquot *dquot; struct dqblk dq_dqblk; - int error; if (dqblk == (struct dqblk *)NULL) return(-EFAULT); if (flags & QUOTA_SYSCALL) { - if ((error = verify_area(VERIFY_READ, dqblk, sizeof(struct dqblk))) != 0) - return(error); - copy_from_user(&dq_dqblk, dqblk, sizeof(struct dqblk)); + if (copy_from_user(&dq_dqblk, dqblk, sizeof(struct dqblk))) + return -EFAULT; } else { memcpy(&dq_dqblk, dqblk, sizeof(struct dqblk)); } @@ -649,13 +650,10 @@ static int get_quota(kdev_t dev, int id, short type, struct dqblk *dqblk) if (dqblk == (struct dqblk *)NULL) return(-EFAULT); - if ((error = verify_area(VERIFY_WRITE, dqblk, sizeof(struct dqblk))) != 0) - return(error); - if ((dquot = dqget(dev, id, type)) != NODQUOT) { - copy_to_user(dqblk, (char *)&dquot->dq_dqb, sizeof(struct dqblk)); + error = copy_to_user(dqblk, (char *)&dquot->dq_dqb, sizeof(struct dqblk)); dqput(dquot); - return(0); + return error ? -EFAULT : 0; } } return(-ESRCH); @@ -663,15 +661,10 @@ static int get_quota(kdev_t dev, int id, short type, struct dqblk *dqblk) static int get_stats(caddr_t addr) { - int error; - - if ((error = verify_area(VERIFY_WRITE, addr, sizeof(struct dqstats))) != 0) - return(error); - dqstats.allocated_dquots = nr_dquots; dqstats.free_dquots = nr_free_dquots; - copy_to_user(addr, (caddr_t)&dqstats, sizeof(struct dqstats)); - return(0); + return copy_to_user(addr, (caddr_t)&dqstats, sizeof(struct dqstats)) + ? -EFAULT : 0; } /* @@ -1014,12 +1007,15 @@ asmlinkage int sys_quotactl(int cmd, const char *special, int id, caddr_t addr) int cmds = 0, type = 0, flags = 0; struct inode *ino; kdev_t dev; + int ret = -EINVAL; + lock_kernel(); cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; if ((u_int) type >= MAXQUOTAS) - return(-EINVAL); + goto out; + ret = -EPERM; switch (cmds) { case Q_SYNC: case Q_GETSTATS: @@ -1027,33 +1023,39 @@ asmlinkage int sys_quotactl(int cmd, const char *special, int id, caddr_t addr) case Q_GETQUOTA: if (((type == USRQUOTA && current->uid != id) || (type == GRPQUOTA && current->gid != id)) && !fsuser()) - return(-EPERM); + goto out; break; default: if (!fsuser()) - return(-EPERM); + goto out; } + ret = -EINVAL; if (special == (char *)NULL && (cmds == Q_SYNC || cmds == Q_GETSTATS)) dev = 0; else { if (namei(special, &ino)) - return(-EINVAL); + goto out; dev = ino->i_rdev; + ret = -ENOTBLK; if (!S_ISBLK(ino->i_mode)) { iput(ino); - return(-ENOTBLK); + goto out; } iput(ino); } + ret = -EINVAL; switch (cmds) { case Q_QUOTAON: - return(quota_on(dev, type, (char *) addr)); + ret = quota_on(dev, type, (char *) addr); + goto out; case Q_QUOTAOFF: - return(quota_off(dev, type)); + ret = quota_off(dev, type); + goto out; case Q_GETQUOTA: - return(get_quota(dev, id, type, (struct dqblk *) addr)); + ret = get_quota(dev, id, type, (struct dqblk *) addr); + goto out; case Q_SETQUOTA: flags |= SET_QUOTA; break; @@ -1064,15 +1066,20 @@ asmlinkage int sys_quotactl(int cmd, const char *special, int id, caddr_t addr) flags |= SET_QLIMIT; break; case Q_SYNC: - return(sync_dquots(dev, type)); + ret = sync_dquots(dev, type); + goto out; case Q_GETSTATS: - return(get_stats(addr)); + ret = get_stats(addr); default: - return(-EINVAL); + goto out; } flags |= QUOTA_SYSCALL; if (has_quota_enabled(dev, type)) - return(set_dqblk(dev, id, type, flags, (struct dqblk *) addr)); - return(-ESRCH); + ret = set_dqblk(dev, id, type, flags, (struct dqblk *) addr); + else + ret = -ESRCH; +out: + unlock_kernel(); + return ret; } @@ -26,6 +26,7 @@ #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/mman.h> #include <linux/a.out.h> #include <linux/errno.h> @@ -38,10 +39,14 @@ #include <linux/malloc.h> #include <linux/binfmts.h> #include <linux/personality.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgtable.h> +#include <asm/mmu_context.h> #include <linux/config.h> #ifdef CONFIG_KERNELD @@ -61,7 +66,7 @@ asmlinkage int sys_brk(unsigned long); static struct linux_binfmt *formats = (struct linux_binfmt *) NULL; -void binfmt_setup(void) +__initfunc(void binfmt_setup(void)) { #ifdef CONFIG_BINFMT_ELF init_elf_binfmt(); @@ -71,6 +76,10 @@ void binfmt_setup(void) init_irix_binfmt(); #endif +#ifdef CONFIG_BINFMT_ELF32 + init_elf32_binfmt(); +#endif + #ifdef CONFIG_BINFMT_AOUT init_aout_binfmt(); #endif @@ -78,6 +87,11 @@ void binfmt_setup(void) #ifdef CONFIG_BINFMT_JAVA init_java_binfmt(); #endif + +#ifdef CONFIG_BINFMT_EM86 + init_em86_binfmt(); +#endif + /* This cannot be configured out of the kernel */ init_script_binfmt(); } @@ -161,9 +175,11 @@ asmlinkage int sys_uselib(const char * library) struct file * file; struct linux_binfmt * fmt; + lock_kernel(); fd = sys_open(library, 0, 0); + retval = fd; if (fd < 0) - return fd; + goto out; file = current->files->fd[fd]; retval = -ENOEXEC; if (file && file->f_inode && file->f_op && file->f_op->read) { @@ -177,6 +193,8 @@ asmlinkage int sys_uselib(const char * library) } } sys_close(fd); +out: + unlock_kernel(); return retval; } @@ -289,7 +307,7 @@ unsigned long setup_arg_pages(unsigned long p, struct linux_binprm * bprm) bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL); + mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (mpnt) { mpnt->vm_mm = current->mm; mpnt->vm_start = PAGE_MASK & (unsigned long) p; @@ -377,6 +395,7 @@ static void exec_mmap(void) return; } *mm = *current->mm; + init_new_context(mm); mm->def_flags = 0; /* should future lockings be kept? */ mm->count = 1; mm->mmap = NULL; @@ -388,8 +407,10 @@ static void exec_mmap(void) new_page_tables(current); return; } + flush_cache_mm(current->mm); exit_mmap(current->mm); clear_page_tables(current); + flush_tlb_mm(current->mm); } /* @@ -403,7 +424,7 @@ static inline void flush_old_signals(struct signal_struct *sig) struct sigaction * sa = sig->action; for (i=32 ; i != 0 ; i--) { - u_sigemptyset(&sa->sa_mask); + sa->sa_mask = 0; sa->sa_flags = 0; if (sa->sa_handler != SIG_IGN) sa->sa_handler = NULL; @@ -517,7 +538,7 @@ int prepare_binprm(struct linux_binprm *bprm) if (IS_NOSUID(bprm->inode) || (current->flags & PF_PTRACED) || (current->fs->count > 1) - || (current->sig->count > 1) + || (atomic_read(¤t->sig->count) > 1) || (current->files->count > 1)) { if (!suser()) return -EPERM; @@ -607,7 +628,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) printable(bprm->buf[2]) && printable(bprm->buf[3])) break; /* -ENOEXEC */ - sprintf(modname, "binfmt-%hd", *(short*)(&bprm->buf)); + sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); request_module(modname); #endif } diff --git a/fs/ext/dir.c b/fs/ext/dir.c deleted file mode 100644 index c6b04d26c..000000000 --- a/fs/ext/dir.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * linux/fs/ext/dir.c - * - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * - * from - * - * linux/fs/minix/dir.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext directory handling functions - */ - -#include <asm/uaccess.h> - -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/ext_fs.h> -#include <linux/stat.h> - -static long ext_dir_read(struct inode * inode, struct file * filp, - char * buf, unsigned long count) -{ - return -EISDIR; -} - -static int ext_readdir(struct inode *, struct file *, void *, filldir_t); - -static struct file_operations ext_dir_operations = { - NULL, /* lseek - default */ - ext_dir_read, /* read */ - NULL, /* write - bad */ - ext_readdir, /* readdir */ - NULL, /* select - default */ - NULL, /* ioctl - default */ - NULL, /* mmap */ - NULL, /* no special open code */ - NULL, /* no special release code */ - file_fsync /* fsync */ -}; - -/* - * directories can handle most operations... - */ -struct inode_operations ext_dir_inode_operations = { - &ext_dir_operations, /* default directory file-ops */ - ext_create, /* create */ - ext_lookup, /* lookup */ - ext_link, /* link */ - ext_unlink, /* unlink */ - ext_symlink, /* symlink */ - ext_mkdir, /* mkdir */ - ext_rmdir, /* rmdir */ - ext_mknod, /* mknod */ - ext_rename, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - ext_truncate, /* truncate */ - NULL /* permission */ -}; - -static int ext_readdir(struct inode * inode, struct file * filp, - void * dirent, filldir_t filldir) -{ - int error; - unsigned int i; - off_t offset; - struct buffer_head * bh; - struct ext_dir_entry * de; - - if (!inode || !S_ISDIR(inode->i_mode)) - return -EBADF; - if ((filp->f_pos & 7) != 0) - return -EBADF; - error = 0; - while (!error && filp->f_pos < inode->i_size) { - offset = filp->f_pos & 1023; - bh = ext_bread(inode,(filp->f_pos)>>BLOCK_SIZE_BITS,0); - if (!bh) { - filp->f_pos += 1024-offset; - continue; - } - for (i = 0; i < 1024 && i < offset; ) { - de = (struct ext_dir_entry *) (bh->b_data + i); - if (!de->rec_len) - break; - i += de->rec_len; - } - offset = i; - de = (struct ext_dir_entry *) (offset + bh->b_data); - while (offset < 1024 && filp->f_pos < inode->i_size) { - if (de->rec_len < 8 || de->rec_len % 8 != 0 || - de->rec_len < de->name_len + 8 || - (de->rec_len + (off_t) filp->f_pos - 1) / 1024 > ((off_t) filp->f_pos / 1024)) { - printk ("ext_readdir: bad dir entry, skipping\n"); - printk ("dev=%s, dir=%ld, " - "offset=%ld, rec_len=%d, name_len=%d\n", - kdevname(inode->i_dev), inode->i_ino, - offset, de->rec_len, de->name_len); - filp->f_pos += 1024-offset; - if (filp->f_pos > inode->i_size) - filp->f_pos = inode->i_size; - continue; - } - if (de->inode) { - error = filldir(dirent, de->name, de->name_len, filp->f_pos, de->inode); - if (error) - break; - } - offset += de->rec_len; - filp->f_pos += de->rec_len; - ((char *) de) += de->rec_len; - } - brelse(bh); - } - return 0; -} diff --git a/fs/ext/file.c b/fs/ext/file.c deleted file mode 100644 index 6e298aa60..000000000 --- a/fs/ext/file.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * linux/fs/ext/file.c - * - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * - * from - * - * linux/fs/minix/file.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext regular file handling primitives - */ - -#include <asm/uaccess.h> -#include <asm/system.h> - -#include <linux/sched.h> -#include <linux/ext_fs.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/locks.h> -#include <linux/pagemap.h> - -#define NBUF 32 - -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MAX(a,b) (((a)>(b))?(a):(b)) - -#include <linux/fs.h> -#include <linux/ext_fs.h> - -static long ext_file_read(struct inode *, struct file *, char *, unsigned long); -static long ext_file_write(struct inode *, struct file *, const char *, unsigned long); - -/* - * We have mostly NULL's here: the current defaults are ok for - * the ext filesystem. - */ -static struct file_operations ext_file_operations = { - NULL, /* lseek - default */ - ext_file_read, /* read */ - ext_file_write, /* write */ - NULL, /* readdir - bad */ - NULL, /* select - default */ - NULL, /* ioctl - default */ - generic_file_mmap, /* mmap */ - NULL, /* no special open is needed */ - NULL, /* release */ - ext_sync_file /* fsync */ -}; - -struct inode_operations ext_file_inode_operations = { - &ext_file_operations, /* default file operations */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - generic_readpage, /* readpage */ - NULL, /* writepage */ - ext_bmap, /* bmap */ - ext_truncate, /* truncate */ - NULL /* permission */ -}; - -static long ext_file_read(struct inode * inode, struct file * filp, - char * buf, unsigned long count) -{ - int read,left,chars; - int block, blocks, offset; - int bhrequest, uptodate; - struct buffer_head ** bhb, ** bhe; - struct buffer_head * bhreq[NBUF]; - struct buffer_head * buflist[NBUF]; - unsigned int size; - - if (!inode) { - printk("ext_file_read: inode = NULL\n"); - return -EINVAL; - } - if (!S_ISREG(inode->i_mode)) { - printk("ext_file_read: mode = %07o\n",inode->i_mode); - return -EINVAL; - } - offset = filp->f_pos; - size = inode->i_size; - if (offset > size) - left = 0; - else - left = size - offset; - if (left > count) - left = count; - if (left <= 0) - return 0; - read = 0; - block = offset >> BLOCK_SIZE_BITS; - offset &= BLOCK_SIZE-1; - size = (size + (BLOCK_SIZE-1)) >> BLOCK_SIZE_BITS; - blocks = (left + offset + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS; - bhb = bhe = buflist; - if (filp->f_reada) { - if(blocks < read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9)) - blocks = read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9); - if (block + blocks > size) - blocks = size - block; - } - - /* We do this in a two stage process. We first try to request - as many blocks as we can, then we wait for the first one to - complete, and then we try to wrap up as many as are actually - done. This routine is rather generic, in that it can be used - in a filesystem by substituting the appropriate function in - for getblk. - - This routine is optimized to make maximum use of the various - buffers and caches. */ - - do { - bhrequest = 0; - uptodate = 1; - while (blocks) { - --blocks; - *bhb = ext_getblk(inode, block++, 0); - if (*bhb && !buffer_uptodate(*bhb)) { - uptodate = 0; - bhreq[bhrequest++] = *bhb; - } - - if (++bhb == &buflist[NBUF]) - bhb = buflist; - - /* If the block we have on hand is uptodate, go ahead - and complete processing. */ - if (uptodate) - break; - if (bhb == bhe) - break; - } - - /* Now request them all */ - if (bhrequest) - ll_rw_block(READ, bhrequest, bhreq); - - do { /* Finish off all I/O that has actually completed */ - if (*bhe) { - wait_on_buffer(*bhe); - if (!buffer_uptodate(*bhe)) { /* read error? */ - brelse(*bhe); - if (++bhe == &buflist[NBUF]) - bhe = buflist; - left = 0; - break; - } - } - if (left < BLOCK_SIZE - offset) - chars = left; - else - chars = BLOCK_SIZE - offset; - filp->f_pos += chars; - left -= chars; - read += chars; - if (*bhe) { - copy_to_user(buf,offset+(*bhe)->b_data,chars); - brelse(*bhe); - buf += chars; - } else { - while (chars-->0) - put_user(0,buf++); - } - offset = 0; - if (++bhe == &buflist[NBUF]) - bhe = buflist; - } while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe))); - } while (left > 0); - -/* Release the read-ahead blocks */ - while (bhe != bhb) { - brelse(*bhe); - if (++bhe == &buflist[NBUF]) - bhe = buflist; - }; - if (!read) - return -EIO; - filp->f_reada = 1; - if (!IS_RDONLY(inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } - return read; -} - -static long ext_file_write(struct inode * inode, struct file * filp, - const char * buf, unsigned long count) -{ - off_t pos; - int written,c; - struct buffer_head * bh; - char * p; - - if (!inode) { - printk("ext_file_write: inode = NULL\n"); - return -EINVAL; - } - if (!S_ISREG(inode->i_mode)) { - printk("ext_file_write: mode = %07o\n",inode->i_mode); - return -EINVAL; - } -/* - * ok, append may not work when many processes are writing at the same time - * but so what. That way leads to madness anyway. - */ - if (filp->f_flags & O_APPEND) - pos = inode->i_size; - else - pos = filp->f_pos; - written = 0; - while (written<count) { - bh = ext_getblk(inode,pos/BLOCK_SIZE,1); - if (!bh) { - if (!written) - written = -ENOSPC; - break; - } - c = BLOCK_SIZE - (pos % BLOCK_SIZE); - if (c > count-written) - c = count-written; - if (c != BLOCK_SIZE && !buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - if (!written) - written = -EIO; - break; - } - } - p = (pos % BLOCK_SIZE) + bh->b_data; - copy_from_user(p,buf,c); - update_vm_cache(inode, pos, p, c); - pos += c; - if (pos > inode->i_size) { - inode->i_size = pos; - inode->i_dirt = 1; - } - written += c; - buf += c; - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 0); - brelse(bh); - } - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - filp->f_pos = pos; - inode->i_dirt = 1; - return written; -} diff --git a/fs/ext/freelists.c b/fs/ext/freelists.c deleted file mode 100644 index eacd45d85..000000000 --- a/fs/ext/freelists.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - * linux/fs/ext/freelists.c - * - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * - */ - -/* freelists.c contains the code that handles the inode and block free lists */ - - -/* - - The free blocks are managed by a linked list. The super block contains the - number of the first free block. This block contains 254 numbers of other - free blocks and the number of the next block in the list. - - When an ext fs is mounted, the number of the first free block is stored - in s->u.ext_sb.s_firstfreeblocknumber and the block header is stored in - s->u.ext_sb.s_firstfreeblock. u.ext_sb.s_freeblockscount contains the count - of free blocks. - - The free inodes are also managed by a linked list in a similar way. The - super block contains the number of the first free inode. This inode contains - 14 numbers of other free inodes and the number of the next inode in the list. - - The number of the first free inode is stored in - s->u.ext_sb.s_firstfreeinodenumber and the header of the block containing - the inode is stored in s->u.ext_sb.s_firstfreeinodeblock. - u.ext_sb.s_freeinodescount contains the count of free inodes. - -*/ - -#include <linux/sched.h> -#include <linux/ext_fs.h> -#include <linux/stat.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/locks.h> - -void ext_free_block(struct super_block * sb, int block) -{ - struct buffer_head * bh; - struct ext_free_block * efb; - - if (!sb) { - printk("trying to free block on non-existent device\n"); - return; - } - lock_super (sb); - if (block < sb->u.ext_sb.s_firstdatazone || - block >= sb->u.ext_sb.s_nzones) { - printk("trying to free block not in datazone\n"); - return; - } - bh = get_hash_table(sb->s_dev, block, sb->s_blocksize); - if (bh) - mark_buffer_clean(bh); - brelse(bh); - if (sb->u.ext_sb.s_firstfreeblock) - efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data; - if (!sb->u.ext_sb.s_firstfreeblock || efb->count == 254) { -#ifdef EXTFS_DEBUG -printk("ext_free_block: block full, skipping to %d\n", block); -#endif - if (sb->u.ext_sb.s_firstfreeblock) - brelse (sb->u.ext_sb.s_firstfreeblock); - if (!(sb->u.ext_sb.s_firstfreeblock = bread (sb->s_dev, - block, sb->s_blocksize))) - panic ("ext_free_block: unable to read block to free\n"); - efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data; - efb->next = sb->u.ext_sb.s_firstfreeblocknumber; - efb->count = 0; - sb->u.ext_sb.s_firstfreeblocknumber = block; - } else { - efb->free[efb->count++] = block; - } - sb->u.ext_sb.s_freeblockscount ++; - sb->s_dirt = 1; - mark_buffer_dirty(sb->u.ext_sb.s_firstfreeblock, 1); - unlock_super (sb); - return; -} - -int ext_new_block(struct super_block * sb) -{ - struct buffer_head * bh; - struct ext_free_block * efb; - int j; - - if (!sb) { - printk("trying to get new block from non-existent device\n"); - return 0; - } - if (!sb->u.ext_sb.s_firstfreeblock) - return 0; - lock_super (sb); - efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data; - if (efb->count) { - j = efb->free[--efb->count]; - mark_buffer_dirty(sb->u.ext_sb.s_firstfreeblock, 1); - } else { -#ifdef EXTFS_DEBUG -printk("ext_new_block: block empty, skipping to %d\n", efb->next); -#endif - j = sb->u.ext_sb.s_firstfreeblocknumber; - sb->u.ext_sb.s_firstfreeblocknumber = efb->next; - brelse (sb->u.ext_sb.s_firstfreeblock); - if (!sb->u.ext_sb.s_firstfreeblocknumber) { - sb->u.ext_sb.s_firstfreeblock = NULL; - } else { - if (!(sb->u.ext_sb.s_firstfreeblock = bread (sb->s_dev, - sb->u.ext_sb.s_firstfreeblocknumber, - sb->s_blocksize))) - panic ("ext_new_block: unable to read next free block\n"); - } - } - if (j < sb->u.ext_sb.s_firstdatazone || j > sb->u.ext_sb.s_nzones) { - printk ("ext_new_block: blk = %d\n", j); - printk("allocating block not in data zone\n"); - return 0; - } - sb->u.ext_sb.s_freeblockscount --; - sb->s_dirt = 1; - - if (!(bh=getblk(sb->s_dev, j, sb->s_blocksize))) { - printk("new_block: cannot get block"); - return 0; - } - memset(bh->b_data, 0, BLOCK_SIZE); - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 1); - brelse(bh); -#ifdef EXTFS_DEBUG -printk("ext_new_block: allocating block %d\n", j); -#endif - unlock_super (sb); - return j; -} - -unsigned long ext_count_free_blocks(struct super_block *sb) -{ -#ifdef EXTFS_DEBUG - struct buffer_head * bh; - struct ext_free_block * efb; - unsigned long count, block; - - lock_super (sb); - if (!sb->u.ext_sb.s_firstfreeblock) - count = 0; - else { - efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data; - count = efb->count + 1; - block = efb->next; - while (block) { - if (!(bh = bread (sb->s_dev, block, sb->s_blocksize))) { - printk ("ext_count_free: error while reading free blocks list\n"); - block = 0; - } else { - efb = (struct ext_free_block *) bh->b_data; - count += efb->count + 1; - block = efb->next; - brelse (bh); - } - } - } -printk("ext_count_free_blocks: stored = %d, computed = %d\n", - sb->u.ext_sb.s_freeblockscount, count); - unlock_super (sb); - return count; -#else - return sb->u.ext_sb.s_freeblockscount; -#endif -} - -void ext_free_inode(struct inode * inode) -{ - struct buffer_head * bh; - struct ext_free_inode * efi; - struct super_block * sb; - unsigned long block; - unsigned long ino; - kdev_t dev; - - if (!inode) - return; - if (!inode->i_dev) { - printk("free_inode: inode has no device\n"); - return; - } - if (inode->i_count != 1) { - printk("free_inode: inode has count=%d\n",inode->i_count); - return; - } - if (inode->i_nlink) { - printk("free_inode: inode has nlink=%d\n",inode->i_nlink); - return; - } - if (!inode->i_sb) { - printk("free_inode: inode on non-existent device\n"); - return; - } - sb = inode->i_sb; - ino = inode->i_ino; - dev = inode->i_dev; - clear_inode(inode); - lock_super (sb); - if (ino < 1 || ino > sb->u.ext_sb.s_ninodes) { - printk("free_inode: inode 0 or non-existent inode\n"); - unlock_super (sb); - return; - } - if (sb->u.ext_sb.s_firstfreeinodeblock) - efi = ((struct ext_free_inode *) sb->u.ext_sb.s_firstfreeinodeblock->b_data) + - (sb->u.ext_sb.s_firstfreeinodenumber-1)%EXT_INODES_PER_BLOCK; - if (!sb->u.ext_sb.s_firstfreeinodeblock || efi->count == 14) { -#ifdef EXTFS_DEBUG -printk("ext_free_inode: inode full, skipping to %d\n", ino); -#endif - if (sb->u.ext_sb.s_firstfreeinodeblock) - brelse (sb->u.ext_sb.s_firstfreeinodeblock); - block = 2 + (ino - 1) / EXT_INODES_PER_BLOCK; - if (!(bh = bread(dev, block, sb->s_blocksize))) - panic("ext_free_inode: unable to read inode block\n"); - efi = ((struct ext_free_inode *) bh->b_data) + - (ino - 1) % EXT_INODES_PER_BLOCK; - efi->next = sb->u.ext_sb.s_firstfreeinodenumber; - efi->count = 0; - sb->u.ext_sb.s_firstfreeinodenumber = ino; - sb->u.ext_sb.s_firstfreeinodeblock = bh; - } else { - efi->free[efi->count++] = ino; - } - sb->u.ext_sb.s_freeinodescount ++; - sb->s_dirt = 1; - mark_buffer_dirty(sb->u.ext_sb.s_firstfreeinodeblock, 1); - unlock_super (sb); -} - -struct inode * ext_new_inode(const struct inode * dir) -{ - struct super_block * sb; - struct inode * inode; - struct ext_free_inode * efi; - unsigned long block; - int j; - - if (!dir || !(inode=get_empty_inode())) - return NULL; - sb = dir->i_sb; - inode->i_sb = sb; - inode->i_flags = sb->s_flags; - if (!sb->u.ext_sb.s_firstfreeinodeblock) - return 0; - lock_super (sb); - efi = ((struct ext_free_inode *) sb->u.ext_sb.s_firstfreeinodeblock->b_data) + - (sb->u.ext_sb.s_firstfreeinodenumber-1)%EXT_INODES_PER_BLOCK; - if (efi->count) { - j = efi->free[--efi->count]; - mark_buffer_dirty(sb->u.ext_sb.s_firstfreeinodeblock, 1); - } else { -#ifdef EXTFS_DEBUG -printk("ext_free_inode: inode empty, skipping to %d\n", efi->next); -#endif - j = sb->u.ext_sb.s_firstfreeinodenumber; - if (efi->next > sb->u.ext_sb.s_ninodes) { - printk ("efi->next = %ld\n", efi->next); - panic ("ext_new_inode: bad inode number in free list\n"); - } - sb->u.ext_sb.s_firstfreeinodenumber = efi->next; - block = 2 + (((unsigned long) efi->next) - 1) / EXT_INODES_PER_BLOCK; - brelse (sb->u.ext_sb.s_firstfreeinodeblock); - if (!sb->u.ext_sb.s_firstfreeinodenumber) { - sb->u.ext_sb.s_firstfreeinodeblock = NULL; - } else { - if (!(sb->u.ext_sb.s_firstfreeinodeblock = - bread(sb->s_dev, block, sb->s_blocksize))) - panic ("ext_new_inode: unable to read next free inode block\n"); - } - } - sb->u.ext_sb.s_freeinodescount --; - sb->s_dirt = 1; - inode->i_count = 1; - inode->i_nlink = 1; - inode->i_dev = sb->s_dev; - inode->i_uid = current->fsuid; - inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; - inode->i_dirt = 1; - inode->i_ino = j; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = NULL; - inode->i_blocks = inode->i_blksize = 0; - insert_inode_hash(inode); -#ifdef EXTFS_DEBUG -printk("ext_new_inode : allocating inode %d\n", inode->i_ino); -#endif - unlock_super (sb); - return inode; -} - -unsigned long ext_count_free_inodes(struct super_block *sb) -{ -#ifdef EXTFS_DEBUG - struct buffer_head * bh; - struct ext_free_inode * efi; - unsigned long count, block, ino; - - lock_super (sb); - if (!sb->u.ext_sb.s_firstfreeinodeblock) - count = 0; - else { - efi = ((struct ext_free_inode *) sb->u.ext_sb.s_firstfreeinodeblock->b_data) + - ((sb->u.ext_sb.s_firstfreeinodenumber-1)%EXT_INODES_PER_BLOCK); - count = efi->count + 1; - ino = efi->next; - while (ino) { - if (ino < 1 || ino > sb->u.ext_sb.s_ninodes) { - printk ("u.ext_sb.s_firstfreeinodenumber = %d, ino = %d\n", - (int) sb->u.ext_sb.s_firstfreeinodenumber,ino); - panic ("ext_count_fre_inodes: bad inode number in free list\n"); - } - block = 2 + ((ino - 1) / EXT_INODES_PER_BLOCK); - if (!(bh = bread (sb->s_dev, block, sb->s_blocksize))) { - printk ("ext_count_free_inodes: error while reading free inodes list\n"); - block = 0; - } else { - efi = ((struct ext_free_inode *) bh->b_data) + - ((ino - 1) % EXT_INODES_PER_BLOCK); - count += efi->count + 1; - ino = efi->next; - brelse (bh); - } - } - } -printk("ext_count_free_inodes: stored = %d, computed = %d\n", - sb->u.ext_sb.s_freeinodescount, count); - unlock_super (sb); - return count; -#else - return sb->u.ext_sb.s_freeinodescount; -#endif -} diff --git a/fs/ext/fsync.c b/fs/ext/fsync.c deleted file mode 100644 index c2385aac5..000000000 --- a/fs/ext/fsync.c +++ /dev/null @@ -1,185 +0,0 @@ - -/* - * linux/fs/ext/fsync.c - * - * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) - * from - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * from - * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds - * - * extfs fsync primitive - */ - -#include <asm/uaccess.h> -#include <asm/system.h> - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/locks.h> - -#include <linux/fs.h> -#include <linux/ext_fs.h> - - -#define blocksize BLOCK_SIZE -#define addr_per_block 256 - -static int sync_block (struct inode * inode, unsigned long * block, int wait) -{ - struct buffer_head * bh; - int tmp; - - if (!*block) - return 0; - tmp = *block; - bh = get_hash_table(inode->i_dev, *block, blocksize); - if (!bh) - return 0; - if (*block != tmp) { - brelse (bh); - return 1; - } - if (wait && buffer_req(bh) && !buffer_uptodate(bh)) { - brelse(bh); - return -1; - } - if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) - { - brelse(bh); - return 0; - } - ll_rw_block(WRITE, 1, &bh); - bh->b_count--; - return 0; -} - -static int sync_iblock (struct inode * inode, unsigned long * iblock, - struct buffer_head **bh, int wait) -{ - int rc, tmp; - - *bh = NULL; - tmp = *iblock; - if (!tmp) - return 0; - rc = sync_block (inode, iblock, wait); - if (rc) - return rc; - *bh = bread(inode->i_dev, tmp, blocksize); - if (tmp != *iblock) { - brelse(*bh); - *bh = NULL; - return 1; - } - if (!*bh) - return -1; - return 0; -} - - -static int sync_direct(struct inode *inode, int wait) -{ - int i; - int rc, err = 0; - - for (i = 0; i < 9; i++) { - rc = sync_block (inode, inode->u.ext_i.i_data + i, wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - return err; -} - -static int sync_indirect(struct inode *inode, unsigned long *iblock, int wait) -{ - int i; - struct buffer_head * ind_bh; - int rc, err = 0; - - rc = sync_iblock (inode, iblock, &ind_bh, wait); - if (rc || !ind_bh) - return rc; - - for (i = 0; i < addr_per_block; i++) { - rc = sync_block (inode, - ((unsigned long *) ind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(ind_bh); - return err; -} - -static int sync_dindirect(struct inode *inode, unsigned long *diblock, - int wait) -{ - int i; - struct buffer_head * dind_bh; - int rc, err = 0; - - rc = sync_iblock (inode, diblock, &dind_bh, wait); - if (rc || !dind_bh) - return rc; - - for (i = 0; i < addr_per_block; i++) { - rc = sync_indirect (inode, - ((unsigned long *) dind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(dind_bh); - return err; -} - -static int sync_tindirect(struct inode *inode, unsigned long *tiblock, - int wait) -{ - int i; - struct buffer_head * tind_bh; - int rc, err = 0; - - rc = sync_iblock (inode, tiblock, &tind_bh, wait); - if (rc || !tind_bh) - return rc; - - for (i = 0; i < addr_per_block; i++) { - rc = sync_dindirect (inode, - ((unsigned long *) tind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(tind_bh); - return err; -} - -int ext_sync_file(struct inode * inode, struct file *file) -{ - int wait, err = 0; - - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return -EINVAL; - for (wait=0; wait<=1; wait++) - { - err |= sync_direct(inode, wait); - err |= sync_indirect(inode, inode->u.ext_i.i_data+9, wait); - err |= sync_dindirect(inode, inode->u.ext_i.i_data+10, wait); - err |= sync_tindirect(inode, inode->u.ext_i.i_data+11, wait); - } - err |= ext_sync_inode (inode); - return (err < 0) ? -EIO : 0; -} diff --git a/fs/ext/inode.c b/fs/ext/inode.c deleted file mode 100644 index 7ca1cd172..000000000 --- a/fs/ext/inode.c +++ /dev/null @@ -1,479 +0,0 @@ -/* - * linux/fs/ext/inode.c - * - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * - * from - * - * linux/fs/minix/inode.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#include <linux/module.h> - -#include <linux/sched.h> -#include <linux/ext_fs.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/locks.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -void ext_put_inode(struct inode *inode) -{ - if (inode->i_nlink) - return; - inode->i_size = 0; - ext_truncate(inode); - ext_free_inode(inode); -} - -void ext_put_super(struct super_block *sb) -{ - - lock_super(sb); - sb->s_dev = 0; - if (sb->u.ext_sb.s_firstfreeinodeblock) - brelse (sb->u.ext_sb.s_firstfreeinodeblock); - if (sb->u.ext_sb.s_firstfreeblock) - brelse (sb->u.ext_sb.s_firstfreeblock); - unlock_super(sb); - MOD_DEC_USE_COUNT; - return; -} - -static struct super_operations ext_sops = { - ext_read_inode, - NULL, - ext_write_inode, - ext_put_inode, - ext_put_super, - ext_write_super, - ext_statfs, - NULL -}; - -struct super_block *ext_read_super(struct super_block *s,void *data, - int silent) -{ - struct buffer_head *bh; - struct ext_super_block *es; - kdev_t dev = s->s_dev; - int block; - - MOD_INC_USE_COUNT; - lock_super(s); - set_blocksize(dev, BLOCK_SIZE); - if (!(bh = bread(dev, 1, BLOCK_SIZE))) { - s->s_dev = 0; - unlock_super(s); - printk("EXT-fs: unable to read superblock\n"); - MOD_DEC_USE_COUNT; - return NULL; - } - es = (struct ext_super_block *) bh->b_data; - s->s_blocksize = 1024; - s->s_blocksize_bits = 10; - s->u.ext_sb.s_ninodes = es->s_ninodes; - s->u.ext_sb.s_nzones = es->s_nzones; - s->u.ext_sb.s_firstdatazone = es->s_firstdatazone; - s->u.ext_sb.s_log_zone_size = es->s_log_zone_size; - s->u.ext_sb.s_max_size = es->s_max_size; - s->s_magic = es->s_magic; - s->u.ext_sb.s_firstfreeblocknumber = es->s_firstfreeblock; - s->u.ext_sb.s_freeblockscount = es->s_freeblockscount; - s->u.ext_sb.s_firstfreeinodenumber = es->s_firstfreeinode; - s->u.ext_sb.s_freeinodescount = es->s_freeinodescount; - brelse(bh); - if (s->s_magic != EXT_SUPER_MAGIC) { - s->s_dev = 0; - unlock_super(s); - if (!silent) - printk("VFS: Can't find an extfs filesystem on dev " - "%s.\n", kdevname(dev)); - MOD_DEC_USE_COUNT; - return NULL; - } - if (!s->u.ext_sb.s_firstfreeblocknumber) - s->u.ext_sb.s_firstfreeblock = NULL; - else - if (!(s->u.ext_sb.s_firstfreeblock = bread(dev, - s->u.ext_sb.s_firstfreeblocknumber, BLOCK_SIZE))) { - printk("ext_read_super: unable to read first free block\n"); - s->s_dev = 0; - unlock_super(s); - MOD_DEC_USE_COUNT; - return NULL; - } - if (!s->u.ext_sb.s_firstfreeinodenumber) - s->u.ext_sb.s_firstfreeinodeblock = NULL; - else { - block = 2 + (s->u.ext_sb.s_firstfreeinodenumber - 1) / EXT_INODES_PER_BLOCK; - if (!(s->u.ext_sb.s_firstfreeinodeblock = bread(dev, block, BLOCK_SIZE))) { - printk("ext_read_super: unable to read first free inode block\n"); - brelse(s->u.ext_sb.s_firstfreeblock); - s->s_dev = 0; - unlock_super (s); - MOD_DEC_USE_COUNT; - return NULL; - } - } - unlock_super(s); - /* set up enough so that it can read an inode */ - s->s_dev = dev; - s->s_op = &ext_sops; - if (!(s->s_mounted = iget(s,EXT_ROOT_INO))) { - s->s_dev = 0; - printk("EXT-fs: get root inode failed\n"); - MOD_DEC_USE_COUNT; - return NULL; - } - return s; -} - -void ext_write_super (struct super_block *sb) -{ - struct buffer_head * bh; - struct ext_super_block * es; - - if (!(bh = bread(sb->s_dev, 1, BLOCK_SIZE))) { - printk ("ext_write_super: bread failed\n"); - return; - } - es = (struct ext_super_block *) bh->b_data; - es->s_firstfreeblock = sb->u.ext_sb.s_firstfreeblocknumber; - es->s_freeblockscount = sb->u.ext_sb.s_freeblockscount; - es->s_firstfreeinode = sb->u.ext_sb.s_firstfreeinodenumber; - es->s_freeinodescount = sb->u.ext_sb.s_freeinodescount; - mark_buffer_dirty(bh, 1); - brelse (bh); - sb->s_dirt = 0; -} - -void ext_statfs (struct super_block *sb, struct statfs *buf, int bufsiz) -{ - struct statfs tmp; - - tmp.f_type = EXT_SUPER_MAGIC; - tmp.f_bsize = 1024; - tmp.f_blocks = sb->u.ext_sb.s_nzones << sb->u.ext_sb.s_log_zone_size; - tmp.f_bfree = ext_count_free_blocks(sb); - tmp.f_bavail = tmp.f_bfree; - tmp.f_files = sb->u.ext_sb.s_ninodes; - tmp.f_ffree = ext_count_free_inodes(sb); - tmp.f_namelen = EXT_NAME_LEN; - copy_to_user(buf, &tmp, bufsiz); -} - -#define inode_bmap(inode,nr) ((inode)->u.ext_i.i_data[(nr)]) - -static inline int block_bmap(struct buffer_head * bh, int nr) -{ - int tmp; - - if (!bh) - return 0; - tmp = ((unsigned long *) bh->b_data)[nr]; - brelse(bh); - return tmp; -} - -int ext_bmap(struct inode * inode,int block) -{ - int i; - - if (block<0) { - printk("ext_bmap: block<0"); - return 0; - } - if (block >= 9+256+256*256+256*256*256) { - printk("ext_bmap: block>big"); - return 0; - } - if (block<9) - return inode_bmap(inode,block); - block -= 9; - if (block<256) { - i = inode_bmap(inode,9); - if (!i) - return 0; - return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block); - } - block -= 256; - if (block<256*256) { - i = inode_bmap(inode,10); - if (!i) - return 0; - i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block>>8); - if (!i) - return 0; - return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block & 255); - } - block -= 256*256; - i = inode_bmap(inode,11); - if (!i) - return 0; - i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block>>16); - if (!i) - return 0; - i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),(block>>8) & 255); - if (!i) - return 0; - return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block & 255); -} - -static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create) -{ - int tmp; - unsigned long * p; - struct buffer_head * result; - - p = inode->u.ext_i.i_data + nr; -repeat: - tmp = *p; - if (tmp) { - result = getblk(inode->i_dev, tmp, BLOCK_SIZE); - if (tmp == *p) - return result; - brelse(result); - goto repeat; - } - if (!create) - return NULL; - tmp = ext_new_block(inode->i_sb); - if (!tmp) - return NULL; - result = getblk(inode->i_dev, tmp, BLOCK_SIZE); - if (*p) { - ext_free_block(inode->i_sb,tmp); - brelse(result); - goto repeat; - } - *p = tmp; - inode->i_ctime = CURRENT_TIME; - inode->i_dirt = 1; - return result; -} - -static struct buffer_head * block_getblk(struct inode * inode, - struct buffer_head * bh, int nr, int create) -{ - int tmp; - unsigned long * p; - struct buffer_head * result; - - if (!bh) - return NULL; - if (!buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - return NULL; - } - } - p = nr + (unsigned long *) bh->b_data; -repeat: - tmp = *p; - if (tmp) { - result = getblk(bh->b_dev, tmp, BLOCK_SIZE); - if (tmp == *p) { - brelse(bh); - return result; - } - brelse(result); - goto repeat; - } - if (!create) { - brelse(bh); - return NULL; - } - tmp = ext_new_block(inode->i_sb); - if (!tmp) { - brelse(bh); - return NULL; - } - result = getblk(bh->b_dev, tmp, BLOCK_SIZE); - if (*p) { - ext_free_block(inode->i_sb,tmp); - brelse(result); - goto repeat; - } - *p = tmp; - mark_buffer_dirty(bh, 1); - brelse(bh); - return result; -} - -struct buffer_head * ext_getblk(struct inode * inode, int block, int create) -{ - struct buffer_head * bh; - - if (block<0) { - printk("ext_getblk: block<0\n"); - return NULL; - } - if (block >= 9+256+256*256+256*256*256) { - printk("ext_getblk: block>big\n"); - return NULL; - } - if (block<9) - return inode_getblk(inode,block,create); - block -= 9; - if (block<256) { - bh = inode_getblk(inode,9,create); - return block_getblk(inode,bh,block,create); - } - block -= 256; - if (block<256*256) { - bh = inode_getblk(inode,10,create); - bh = block_getblk(inode,bh,block>>8,create); - return block_getblk(inode,bh,block & 255,create); - } - block -= 256*256; - bh = inode_getblk(inode,11,create); - bh = block_getblk(inode,bh,block>>16,create); - bh = block_getblk(inode,bh,(block>>8) & 255,create); - return block_getblk(inode,bh,block & 255,create); -} - -struct buffer_head * ext_bread(struct inode * inode, int block, int create) -{ - struct buffer_head * bh; - - bh = ext_getblk(inode,block,create); - if (!bh || buffer_uptodate(bh)) - return bh; - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return bh; - brelse(bh); - return NULL; -} - -void ext_read_inode(struct inode * inode) -{ - struct buffer_head * bh; - struct ext_inode * raw_inode; - int block; - - block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK; - if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE))) - panic("unable to read i-node block"); - raw_inode = ((struct ext_inode *) bh->b_data) + - (inode->i_ino-1)%EXT_INODES_PER_BLOCK; - inode->i_mode = raw_inode->i_mode; - inode->i_uid = raw_inode->i_uid; - inode->i_gid = raw_inode->i_gid; - inode->i_nlink = raw_inode->i_nlinks; - inode->i_size = raw_inode->i_size; - inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time; - inode->i_blocks = inode->i_blksize = 0; - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - inode->i_rdev = to_kdev_t(raw_inode->i_zone[0]); - else for (block = 0; block < 12; block++) - inode->u.ext_i.i_data[block] = raw_inode->i_zone[block]; - brelse(bh); - inode->i_op = NULL; - if (S_ISREG(inode->i_mode)) - inode->i_op = &ext_file_inode_operations; - else if (S_ISDIR(inode->i_mode)) - inode->i_op = &ext_dir_inode_operations; - else if (S_ISLNK(inode->i_mode)) - inode->i_op = &ext_symlink_inode_operations; - else if (S_ISCHR(inode->i_mode)) - inode->i_op = &chrdev_inode_operations; - else if (S_ISBLK(inode->i_mode)) - inode->i_op = &blkdev_inode_operations; - else if (S_ISFIFO(inode->i_mode)) - init_fifo(inode); -} - -static struct buffer_head * ext_update_inode(struct inode * inode) -{ - struct buffer_head * bh; - struct ext_inode * raw_inode; - int block; - - block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK; - if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE))) - panic("unable to read i-node block"); - raw_inode = ((struct ext_inode *)bh->b_data) + - (inode->i_ino-1)%EXT_INODES_PER_BLOCK; - raw_inode->i_mode = inode->i_mode; - raw_inode->i_uid = inode->i_uid; - raw_inode->i_gid = inode->i_gid; - raw_inode->i_nlinks = inode->i_nlink; - raw_inode->i_size = inode->i_size; - raw_inode->i_time = inode->i_mtime; - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - raw_inode->i_zone[0] = kdev_t_to_nr(inode->i_rdev); - else for (block = 0; block < 12; block++) - raw_inode->i_zone[block] = inode->u.ext_i.i_data[block]; - mark_buffer_dirty(bh, 1); - inode->i_dirt=0; - return bh; -} - -void ext_write_inode(struct inode * inode) -{ - struct buffer_head *bh; - bh = ext_update_inode (inode); - brelse(bh); -} - -int ext_sync_inode (struct inode *inode) -{ - int err = 0; - struct buffer_head *bh; - - bh = ext_update_inode(inode); - if (bh && buffer_dirty(bh)) - { - ll_rw_block(WRITE, 1, &bh); - wait_on_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - { - printk ("IO error syncing ext inode [" - "%s:%08lx]\n", - kdevname(inode->i_dev), inode->i_ino); - err = -1; - } - } - else if (!bh) - err = -1; - brelse (bh); - return err; -} - - -static struct file_system_type ext_fs_type = { - ext_read_super, "ext", 1, NULL -}; - -int init_ext_fs(void) -{ - return register_filesystem(&ext_fs_type); -} - -#ifdef MODULE -int init_module(void) -{ - int status; - - if ((status = init_ext_fs()) == 0) - register_symtab(0); - return status; -} - -void cleanup_module(void) -{ - unregister_filesystem(&ext_fs_type); -} - -#endif diff --git a/fs/ext/namei.c b/fs/ext/namei.c deleted file mode 100644 index 0bbb771fb..000000000 --- a/fs/ext/namei.c +++ /dev/null @@ -1,903 +0,0 @@ -/* - * linux/fs/ext/namei.c - * - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * - * from - * - * linux/fs/minix/namei.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#include <linux/sched.h> -#include <linux/ext_fs.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/errno.h> - -#include <asm/uaccess.h> - -/* - * comment out this line if you want names > EXT_NAME_LEN chars to be - * truncated. Else they will be disallowed. - */ -/* #define NO_TRUNCATE */ - -/* - * EXT_DIR_PAD defines the directory entries boundaries - * - * NOTE: It must be a power of 2 and must be greater or equal than 8 - * because a directory entry needs 8 bytes for its fixed part - * (4 bytes for the inode, 2 bytes for the entry length and 2 bytes - * for the name length) - */ -#define EXT_DIR_PAD 8 - -/* - * - * EXT_DIR_MIN_SIZE is the minimal size of a directory entry - * - * During allocations, a directory entry is split into 2 ones - * *ONLY* if the size of the unused part is greater than or - * equal to EXT_DIR_MIN_SIZE - */ -#define EXT_DIR_MIN_SIZE 12 - -/* - * ok, we cannot use strncmp, as the name is not in our data space. - * Thus we'll have to use ext_match. No big problem. Match also makes - * some sanity tests. - * - * NOTE! unlike strncmp, ext_match returns 1 for success, 0 for failure. - */ -static int ext_match(int len,const char * name,struct ext_dir_entry * de) -{ - if (!de || !de->inode || len > EXT_NAME_LEN) - return 0; - /* "" means "." ---> so paths like "/usr/lib//libc.a" work */ - if (!len && (de->name[0]=='.') && (de->name[1]=='\0')) - return 1; - if (len != de->name_len) - return 0; - return !memcmp(name, de->name, len); -} - -/* - * ext_find_entry() - * - * finds an entry in the specified directory with the wanted name. It - * returns the cache buffer in which the entry was found, and the entry - * itself (as a parameter - res_dir). It does NOT read the inode of the - * entry - you'll have to do that yourself if you want to. - * - * addition for the ext file system : this function returns the previous - * and next directory entries in the parameters prev_dir and next_dir - */ -static struct buffer_head * ext_find_entry(struct inode * dir, - const char * name, int namelen, struct ext_dir_entry ** res_dir, - struct ext_dir_entry ** prev_dir, struct ext_dir_entry ** next_dir) -{ - long offset; - struct buffer_head * bh; - struct ext_dir_entry * de; - - *res_dir = NULL; - if (!dir) - return NULL; -#ifdef NO_TRUNCATE - if (namelen > EXT_NAME_LEN) - return NULL; -#else - if (namelen > EXT_NAME_LEN) - namelen = EXT_NAME_LEN; -#endif - bh = ext_bread(dir,0,0); - if (!bh) - return NULL; - if (prev_dir) - *prev_dir = NULL; - if (next_dir) - *next_dir = NULL; - offset = 0; - de = (struct ext_dir_entry *) bh->b_data; - while (offset < dir->i_size) { - if ((char *)de >= BLOCK_SIZE+bh->b_data) { - brelse(bh); - bh = NULL; - bh = ext_bread(dir,offset>>BLOCK_SIZE_BITS,0); - if (!bh) - continue; - de = (struct ext_dir_entry *) bh->b_data; - if (prev_dir) - *prev_dir = NULL; - } - if (de->rec_len < 8 || de->rec_len % 8 != 0 || - de->rec_len < de->name_len + 8 || - (((char *) de) + de->rec_len-1 >= BLOCK_SIZE+bh->b_data)) { - printk ("ext_find_entry: bad dir entry\n"); - printk ("dev=%s, dir=%ld, offset=%ld, " - "rec_len=%d, name_len=%d\n", - kdevname(dir->i_dev), dir->i_ino, offset, - de->rec_len, de->name_len); - de = (struct ext_dir_entry *) (bh->b_data+BLOCK_SIZE); - offset = ((offset / BLOCK_SIZE) + 1) * BLOCK_SIZE; - continue; -/* brelse (bh); - return NULL; */ - } - if (ext_match(namelen,name,de)) { - *res_dir = de; - if (next_dir) - if (offset + de->rec_len < dir->i_size && - ((char *)de) + de->rec_len < BLOCK_SIZE+bh->b_data) - *next_dir = (struct ext_dir_entry *) - ((char *) de + de->rec_len); - else - *next_dir = NULL; - return bh; - } - offset += de->rec_len; - if (prev_dir) - *prev_dir = de; - de = (struct ext_dir_entry *) ((char *) de + de->rec_len); - } - brelse(bh); - return NULL; -} - -int ext_lookup(struct inode * dir,const char * name, int len, - struct inode ** result) -{ - int ino; - struct ext_dir_entry * de; - struct buffer_head * bh; - - *result = NULL; - if (!dir) - return -ENOENT; - if (!S_ISDIR(dir->i_mode)) { - iput(dir); - return -ENOENT; - } - if (!(bh = ext_find_entry(dir,name,len,&de,NULL,NULL))) { - iput(dir); - return -ENOENT; - } - ino = de->inode; - brelse(bh); - if (!(*result = iget(dir->i_sb,ino))) { - iput(dir); - return -EACCES; - } - iput(dir); - return 0; -} - -/* - * ext_add_entry() - * - * adds a file entry to the specified directory, using the same - * semantics as ext_find_entry(). It returns NULL if it failed. - * - * NOTE!! The inode part of 'de' is left at 0 - which means you - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -static struct buffer_head * ext_add_entry(struct inode * dir, - const char * name, int namelen, struct ext_dir_entry ** res_dir) -{ - int i; - long offset; - unsigned short rec_len; - struct buffer_head * bh; - struct ext_dir_entry * de, * de1; - - *res_dir = NULL; - if (!dir) - return NULL; -#ifdef NO_TRUNCATE - if (namelen > EXT_NAME_LEN) - return NULL; -#else - if (namelen > EXT_NAME_LEN) - namelen = EXT_NAME_LEN; -#endif - if (!namelen) - return NULL; - bh = ext_bread(dir,0,0); - if (!bh) - return NULL; - rec_len = ((8 + namelen + EXT_DIR_PAD - 1) / EXT_DIR_PAD) * EXT_DIR_PAD; - offset = 0; - de = (struct ext_dir_entry *) bh->b_data; - while (1) { - if ((char *)de >= BLOCK_SIZE+bh->b_data && offset < dir->i_size) { -#ifdef EXTFS_DEBUG -printk ("ext_add_entry: skipping to next block\n"); -#endif - brelse(bh); - bh = NULL; - bh = ext_bread(dir,offset>>BLOCK_SIZE_BITS,0); - if (!bh) - return NULL; - de = (struct ext_dir_entry *) bh->b_data; - } - if (offset >= dir->i_size) { - /* Check that the directory entry fits in the block */ - if (offset % BLOCK_SIZE == 0 || - (BLOCK_SIZE - (offset % BLOCK_SIZE)) < rec_len) { - if ((offset % BLOCK_SIZE) != 0) { - /* If the entry does not fit in the - block, the remainder of the block - becomes an unused entry */ - de->inode = 0; - de->rec_len = BLOCK_SIZE - - (offset & (BLOCK_SIZE - 1)); - de->name_len = 0; - offset += de->rec_len; - dir->i_size += de->rec_len; - dir->i_dirt = 1; -#if 0 - dir->i_ctime = CURRENT_TIME; -#endif - mark_buffer_dirty(bh, 1); - } - brelse (bh); - bh = NULL; -#ifdef EXTFS_DEBUG -printk ("ext_add_entry : creating next block\n"); -#endif - bh = ext_bread(dir,offset>>BLOCK_SIZE_BITS,1); - if (!bh) - return NULL; /* Other thing to do ??? */ - de = (struct ext_dir_entry *) bh->b_data; - } - /* Allocate the entry */ - de->inode=0; - de->rec_len = rec_len; - dir->i_size += de->rec_len; - dir->i_dirt = 1; -#if 0 - dir->i_ctime = CURRENT_TIME; -#endif - } - if (de->rec_len < 8 || de->rec_len % 4 != 0 || - de->rec_len < de->name_len + 8 || - (((char *) de) + de->rec_len-1 >= BLOCK_SIZE+bh->b_data)) { - printk ("ext_addr_entry: bad dir entry\n"); - printk ("dev=%s, dir=%ld, offset=%ld, " - "rec_len=%d, name_len=%d\n", - kdevname(dir->i_dev), dir->i_ino, offset, - de->rec_len, de->name_len); - brelse (bh); - return NULL; - } - if (!de->inode && de->rec_len >= rec_len) { - if (de->rec_len > rec_len - && de->rec_len - rec_len >= EXT_DIR_MIN_SIZE) { - /* The found entry is too big : it is split - into 2 ones : - - the 1st one will be used to hold the name, - - the 2nd one is unused */ - de1 = (struct ext_dir_entry *) ((char *) de + rec_len); - de1->inode = 0; - de1->rec_len = de->rec_len - rec_len; - de1->name_len = 0; - de->rec_len = rec_len; - } - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - dir->i_dirt = 1; - de->name_len = namelen; - for (i=0; i < namelen ; i++) - de->name[i] = name[i]; - mark_buffer_dirty(bh, 1); - *res_dir = de; - return bh; - } - offset += de->rec_len; - de = (struct ext_dir_entry *) ((char *) de + de->rec_len); - } - brelse(bh); - return NULL; -} - -int ext_create(struct inode * dir,const char * name, int len, int mode, - struct inode ** result) -{ - struct inode * inode; - struct buffer_head * bh; - struct ext_dir_entry * de; - - *result = NULL; - if (!dir) - return -ENOENT; - inode = ext_new_inode(dir); - if (!inode) { - iput(dir); - return -ENOSPC; - } - inode->i_op = &ext_file_inode_operations; - inode->i_mode = mode; - inode->i_dirt = 1; - bh = ext_add_entry(dir,name,len,&de); - if (!bh) { - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - iput(dir); - return -ENOSPC; - } - de->inode = inode->i_ino; - mark_buffer_dirty(bh, 1); - brelse(bh); - iput(dir); - *result = inode; - return 0; -} - -int ext_mknod(struct inode * dir, const char * name, int len, int mode, int rdev) -{ - struct inode * inode; - struct buffer_head * bh; - struct ext_dir_entry * de; - - if (!dir) - return -ENOENT; - bh = ext_find_entry(dir,name,len,&de,NULL,NULL); - if (bh) { - brelse(bh); - iput(dir); - return -EEXIST; - } - inode = ext_new_inode(dir); - if (!inode) { - iput(dir); - return -ENOSPC; - } - inode->i_uid = current->fsuid; - inode->i_mode = mode; - inode->i_op = NULL; - if (S_ISREG(inode->i_mode)) - inode->i_op = &ext_file_inode_operations; - else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ext_dir_inode_operations; - if (dir->i_mode & S_ISGID) - inode->i_mode |= S_ISGID; - } - else if (S_ISLNK(inode->i_mode)) - inode->i_op = &ext_symlink_inode_operations; - else if (S_ISCHR(inode->i_mode)) - inode->i_op = &chrdev_inode_operations; - else if (S_ISBLK(inode->i_mode)) - inode->i_op = &blkdev_inode_operations; - else if (S_ISFIFO(inode->i_mode)) - init_fifo(inode); - if (S_ISBLK(mode) || S_ISCHR(mode)) - inode->i_rdev = to_kdev_t(rdev); -#if 0 - inode->i_mtime = inode->i_atime = CURRENT_TIME; -#endif - inode->i_dirt = 1; - bh = ext_add_entry(dir,name,len,&de); - if (!bh) { - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - iput(dir); - return -ENOSPC; - } - de->inode = inode->i_ino; - mark_buffer_dirty(bh, 1); - brelse(bh); - iput(dir); - iput(inode); - return 0; -} - -int ext_mkdir(struct inode * dir, const char * name, int len, int mode) -{ - struct inode * inode; - struct buffer_head * bh, *dir_block; - struct ext_dir_entry * de; - - bh = ext_find_entry(dir,name,len,&de,NULL,NULL); - if (bh) { - brelse(bh); - iput(dir); - return -EEXIST; - } - inode = ext_new_inode(dir); - if (!inode) { - iput(dir); - return -ENOSPC; - } - inode->i_op = &ext_dir_inode_operations; - inode->i_size = 2 * 16; /* Each entry is coded on 16 bytes for "." and ".." - - 4 bytes for the inode number, - - 2 bytes for the record length - - 2 bytes for the name length - - 8 bytes for the name */ -#if 0 - inode->i_mtime = inode->i_atime = CURRENT_TIME; -#endif - dir_block = ext_bread(inode,0,1); - if (!dir_block) { - iput(dir); - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - return -ENOSPC; - } - de = (struct ext_dir_entry *) dir_block->b_data; - de->inode=inode->i_ino; - de->rec_len=16; - de->name_len=1; - strcpy(de->name,"."); - de = (struct ext_dir_entry *) ((char *) de + de->rec_len); - de->inode = dir->i_ino; - de->rec_len=16; - de->name_len=2; - strcpy(de->name,".."); - inode->i_nlink = 2; - mark_buffer_dirty(dir_block, 1); - brelse(dir_block); - inode->i_mode = S_IFDIR | (mode & 0777 & ~current->fs->umask); - if (dir->i_mode & S_ISGID) - inode->i_mode |= S_ISGID; - inode->i_dirt = 1; - bh = ext_add_entry(dir,name,len,&de); - if (!bh) { - iput(dir); - inode->i_nlink=0; - iput(inode); - return -ENOSPC; - } - de->inode = inode->i_ino; - mark_buffer_dirty(bh, 1); - dir->i_nlink++; - dir->i_dirt = 1; - iput(dir); - iput(inode); - brelse(bh); - return 0; -} - -/* - * routine to check that the specified directory is empty (for rmdir) - */ -static int empty_dir(struct inode * inode) -{ - unsigned long offset; - struct buffer_head * bh; - struct ext_dir_entry * de, * de1; - - if (inode->i_size < 2 * 12 || !(bh = ext_bread(inode,0,0))) { - printk("warning - bad directory on dev %s\n", - kdevname(inode->i_dev)); - return 1; - } - de = (struct ext_dir_entry *) bh->b_data; - de1 = (struct ext_dir_entry *) ((char *) de + de->rec_len); - if (de->inode != inode->i_ino || !de1->inode || - strcmp(".",de->name) || strcmp("..",de1->name)) { - printk("warning - bad directory on dev %s\n", - kdevname(inode->i_dev)); - return 1; - } - offset = de->rec_len + de1->rec_len; - de = (struct ext_dir_entry *) ((char *) de1 + de1->rec_len); - while (offset < inode->i_size ) { - if ((void *) de >= (void *) (bh->b_data+BLOCK_SIZE)) { - brelse(bh); - bh = ext_bread(inode, offset >> BLOCK_SIZE_BITS,1); - if (!bh) { - offset += BLOCK_SIZE; - continue; - } - de = (struct ext_dir_entry *) bh->b_data; - } - if (de->rec_len < 8 || de->rec_len %4 != 0 || - de->rec_len < de->name_len + 8) { - printk ("empty_dir: bad dir entry\n"); - printk ("dev=%s, dir=%ld, offset=%ld, " - "rec_len=%d, name_len=%d\n", - kdevname(inode->i_dev), inode->i_ino, - offset, de->rec_len, de->name_len); - brelse (bh); - return 1; - } - if (de->inode) { - brelse(bh); - return 0; - } - offset += de->rec_len; - de = (struct ext_dir_entry *) ((char *) de + de->rec_len); - } - brelse(bh); - return 1; -} - -static inline void ext_merge_entries (struct ext_dir_entry * de, - struct ext_dir_entry * pde, struct ext_dir_entry * nde) -{ - if (nde && !nde->inode) - de->rec_len += nde->rec_len; - if (pde && !pde->inode) - pde->rec_len += de->rec_len; -} - -int ext_rmdir(struct inode * dir, const char * name, int len) -{ - int retval; - struct inode * inode; - struct buffer_head * bh; - struct ext_dir_entry * de, * pde, * nde; - - inode = NULL; - bh = ext_find_entry(dir,name,len,&de,&pde,&nde); - retval = -ENOENT; - if (!bh) - goto end_rmdir; - retval = -EPERM; - if (!(inode = iget(dir->i_sb, de->inode))) - goto end_rmdir; - if ((dir->i_mode & S_ISVTX) && !fsuser() && - current->fsuid != inode->i_uid && - current->fsuid != dir->i_uid) - goto end_rmdir; - if (inode->i_dev != dir->i_dev) - goto end_rmdir; - if (inode == dir) /* we may not delete ".", but "../dir" is ok */ - goto end_rmdir; - if (!S_ISDIR(inode->i_mode)) { - retval = -ENOTDIR; - goto end_rmdir; - } - if (!empty_dir(inode)) { - retval = -ENOTEMPTY; - goto end_rmdir; - } - if (inode->i_count > 1) { - retval = -EBUSY; - goto end_rmdir; - } - if (inode->i_nlink != 2) - printk("empty directory has nlink!=2 (%d)\n",inode->i_nlink); - de->inode = 0; - de->name_len = 0; - ext_merge_entries (de, pde, nde); - mark_buffer_dirty(bh, 1); - inode->i_nlink=0; - inode->i_dirt=1; - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - dir->i_dirt=1; - retval = 0; -end_rmdir: - iput(dir); - iput(inode); - brelse(bh); - return retval; -} - -int ext_unlink(struct inode * dir, const char * name, int len) -{ - int retval; - struct inode * inode; - struct buffer_head * bh; - struct ext_dir_entry * de, * pde, * nde; - - retval = -ENOENT; - inode = NULL; - bh = ext_find_entry(dir,name,len,&de,&pde,&nde); - if (!bh) - goto end_unlink; - if (!(inode = iget(dir->i_sb, de->inode))) - goto end_unlink; - retval = -EPERM; - if ((dir->i_mode & S_ISVTX) && !fsuser() && - current->fsuid != inode->i_uid && - current->fsuid != dir->i_uid) - goto end_unlink; - if (S_ISDIR(inode->i_mode)) - goto end_unlink; - if (!inode->i_nlink) { - printk("Deleting nonexistent file (%s:%ld), %d\n", - kdevname(inode->i_dev), inode->i_ino, - inode->i_nlink); - inode->i_nlink=1; - } - de->inode = 0; - de->name_len = 0; - ext_merge_entries (de, pde, nde); - mark_buffer_dirty(bh, 1); - inode->i_nlink--; - inode->i_dirt = 1; - inode->i_ctime = CURRENT_TIME; - dir->i_ctime = dir->i_mtime = inode->i_ctime; - dir->i_dirt = 1; - retval = 0; -end_unlink: - brelse(bh); - iput(inode); - iput(dir); - return retval; -} - -int ext_symlink(struct inode * dir, const char * name, int len, const char * symname) -{ - struct ext_dir_entry * de; - struct inode * inode = NULL; - struct buffer_head * bh = NULL, * name_block = NULL; - int i; - char c; - - if (!(inode = ext_new_inode(dir))) { - iput(dir); - return -ENOSPC; - } - inode->i_mode = S_IFLNK | 0777; - inode->i_op = &ext_symlink_inode_operations; - name_block = ext_bread(inode,0,1); - if (!name_block) { - iput(dir); - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - return -ENOSPC; - } - i = 0; - while (i < 1023 && (c = *(symname++))) - name_block->b_data[i++] = c; - name_block->b_data[i] = 0; - mark_buffer_dirty(name_block, 1); - brelse(name_block); - inode->i_size = i; - inode->i_dirt = 1; - bh = ext_find_entry(dir,name,len,&de,NULL,NULL); - if (bh) { - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - brelse(bh); - iput(dir); - return -EEXIST; - } - bh = ext_add_entry(dir,name,len,&de); - if (!bh) { - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - iput(dir); - return -ENOSPC; - } - de->inode = inode->i_ino; - mark_buffer_dirty(bh, 1); - brelse(bh); - iput(dir); - iput(inode); - return 0; -} - -int ext_link(struct inode * oldinode, struct inode * dir, const char * name, int len) -{ - struct ext_dir_entry * de; - struct buffer_head * bh; - - if (S_ISDIR(oldinode->i_mode)) { - iput(oldinode); - iput(dir); - return -EPERM; - } - if (oldinode->i_nlink > 32000) { - iput(oldinode); - iput(dir); - return -EMLINK; - } - bh = ext_find_entry(dir,name,len,&de,NULL,NULL); - if (bh) { - brelse(bh); - iput(dir); - iput(oldinode); - return -EEXIST; - } - bh = ext_add_entry(dir,name,len,&de); - if (!bh) { - iput(dir); - iput(oldinode); - return -ENOSPC; - } - de->inode = oldinode->i_ino; - mark_buffer_dirty(bh, 1); - brelse(bh); - iput(dir); - oldinode->i_nlink++; - oldinode->i_ctime = CURRENT_TIME; - oldinode->i_dirt = 1; - iput(oldinode); - return 0; -} - -static int subdir(struct inode * new_inode, struct inode * old_inode) -{ - int ino; - int result; - - new_inode->i_count++; - result = 0; - for (;;) { - if (new_inode == old_inode) { - result = 1; - break; - } - if (new_inode->i_dev != old_inode->i_dev) - break; - ino = new_inode->i_ino; - if (ext_lookup(new_inode,"..",2,&new_inode)) - break; - if (new_inode->i_ino == ino) - break; - } - iput(new_inode); - return result; -} - -#define PARENT_INO(buffer) \ -((struct ext_dir_entry *) ((char *) buffer + \ -((struct ext_dir_entry *) buffer)->rec_len))->inode - -#define PARENT_NAME(buffer) \ -((struct ext_dir_entry *) ((char *) buffer + \ -((struct ext_dir_entry *) buffer)->rec_len))->name - -/* - * rename uses retrying to avoid race-conditions: at least they should be minimal. - * it tries to allocate all the blocks, then sanity-checks, and if the sanity- - * checks fail, it tries to restart itself again. Very practical - no changes - * are done until we know everything works ok.. and then all the changes can be - * done in one fell swoop when we have claimed all the buffers needed. - * - * Anybody can rename anything with this: the permission checks are left to the - * higher-level routines. - */ -static int do_ext_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len) -{ - struct inode * old_inode, * new_inode; - struct buffer_head * old_bh, * new_bh, * dir_bh; - struct ext_dir_entry * old_de, * new_de, * pde, * nde; - int retval; - - goto start_up; -try_again: - brelse(old_bh); - brelse(new_bh); - brelse(dir_bh); - iput(old_inode); - iput(new_inode); - current->counter = 0; - schedule(); -start_up: - old_inode = new_inode = NULL; - old_bh = new_bh = dir_bh = NULL; - old_bh = ext_find_entry(old_dir,old_name,old_len,&old_de,&pde,&nde); - retval = -ENOENT; - if (!old_bh) - goto end_rename; - old_inode = __iget(old_dir->i_sb, old_de->inode,0); /* don't cross mnt-points */ - if (!old_inode) - goto end_rename; - retval = -EPERM; - if ((old_dir->i_mode & S_ISVTX) && - current->fsuid != old_inode->i_uid && - current->fsuid != old_dir->i_uid && !fsuser()) - goto end_rename; - new_bh = ext_find_entry(new_dir,new_name,new_len,&new_de,NULL,NULL); - if (new_bh) { - new_inode = __iget(new_dir->i_sb, new_de->inode,0); /* don't cross mnt-points */ - if (!new_inode) { - brelse(new_bh); - new_bh = NULL; - } - } - if (new_inode == old_inode) { - retval = 0; - goto end_rename; - } - if (new_inode && S_ISDIR(new_inode->i_mode)) { - retval = -EEXIST; - goto end_rename; - } - retval = -EPERM; - if (new_inode && (new_dir->i_mode & S_ISVTX) && - current->fsuid != new_inode->i_uid && - current->fsuid != new_dir->i_uid && !fsuser()) - goto end_rename; - if (S_ISDIR(old_inode->i_mode)) { - retval = -EEXIST; - if (new_bh) - goto end_rename; - if ((retval = permission(old_inode, MAY_WRITE)) != 0) - goto end_rename; - retval = -EINVAL; - if (subdir(new_dir, old_inode)) - goto end_rename; - retval = -EIO; - dir_bh = ext_bread(old_inode,0,0); - if (!dir_bh) - goto end_rename; - if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino) - goto end_rename; - } - if (!new_bh) - new_bh = ext_add_entry(new_dir,new_name,new_len,&new_de); - retval = -ENOSPC; - if (!new_bh) - goto end_rename; -/* sanity checking before doing the rename - avoid races */ - if (new_inode && (new_de->inode != new_inode->i_ino)) - goto try_again; - if (new_de->inode && !new_inode) - goto try_again; - if (old_de->inode != old_inode->i_ino) - goto try_again; -/* ok, that's it */ - old_de->inode = 0; - old_de->name_len = 0; - new_de->inode = old_inode->i_ino; - ext_merge_entries (old_de, pde, nde); - if (new_inode) { - new_inode->i_nlink--; - new_inode->i_dirt = 1; - } - mark_buffer_dirty(old_bh, 1); - mark_buffer_dirty(new_bh, 1); - if (dir_bh) { - PARENT_INO(dir_bh->b_data) = new_dir->i_ino; - mark_buffer_dirty(dir_bh, 1); - old_dir->i_nlink--; - new_dir->i_nlink++; - old_dir->i_dirt = 1; - new_dir->i_dirt = 1; - } - retval = 0; -end_rename: - brelse(dir_bh); - brelse(old_bh); - brelse(new_bh); - iput(old_inode); - iput(new_inode); - iput(old_dir); - iput(new_dir); - return retval; -} - -/* - * Ok, rename also locks out other renames, as they can change the parent of - * a directory, and we don't want any races. Other races are checked for by - * "do_rename()", which restarts if there are inconsistencies. - * - * Note that there is no race between different filesystems: it's only within - * the same device that races occur: many renames can happen at once, as long - * as they are on different partitions. - */ -int ext_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len, - int must_be_dir) -{ - static struct wait_queue * wait = NULL; - static int lock = 0; - int result; - - while (lock) - sleep_on(&wait); - lock = 1; - result = do_ext_rename(old_dir, old_name, old_len, - new_dir, new_name, new_len); - lock = 0; - wake_up(&wait); - return result; -} diff --git a/fs/ext/symlink.c b/fs/ext/symlink.c deleted file mode 100644 index 6dd04439d..000000000 --- a/fs/ext/symlink.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * linux/fs/ext/symlink.c - * - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * - * from - * - * linux/fs/minix/symlink.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext symlink handling code - */ - -#include <asm/uaccess.h> - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/ext_fs.h> -#include <linux/stat.h> - -static int ext_readlink(struct inode *, char *, int); -static int ext_follow_link(struct inode *, struct inode *, int, int, struct inode **); - -/* - * symlinks can't do much... - */ -struct inode_operations ext_symlink_inode_operations = { - NULL, /* no file-operations */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - ext_readlink, /* readlink */ - ext_follow_link, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ -}; - -static int ext_follow_link(struct inode * dir, struct inode * inode, - int flag, int mode, struct inode ** res_inode) -{ - int error; - struct buffer_head * bh; - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput(dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput(dir); - *res_inode = inode; - return 0; - } - if (current->link_count > 5) { - iput(dir); - iput(inode); - return -ELOOP; - } - if (!(bh = ext_bread(inode, 0, 0))) { - iput(inode); - iput(dir); - return -EIO; - } - iput(inode); - current->link_count++; - error = open_namei(bh->b_data,flag,mode,res_inode,dir); - current->link_count--; - brelse(bh); - return error; -} - -static int ext_readlink(struct inode * inode, char * buffer, int buflen) -{ - struct buffer_head * bh; - int i; - char c; - - if (!S_ISLNK(inode->i_mode)) { - iput(inode); - return -EINVAL; - } - if (buflen > 1023) - buflen = 1023; - bh = ext_bread(inode, 0, 0); - iput(inode); - if (!bh) - return 0; - i = 0; - while (i<buflen && (c = bh->b_data[i])) { - i++; - put_user(c,buffer++); - } - brelse(bh); - return i; -} diff --git a/fs/ext/truncate.c b/fs/ext/truncate.c deleted file mode 100644 index 995fc5506..000000000 --- a/fs/ext/truncate.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * linux/fs/ext/truncate.c - * - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * - * from - * - * linux/fs/minix/truncate.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#include <linux/sched.h> -#include <linux/ext_fs.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/errno.h> - -/* - * Truncate has the most races in the whole filesystem: coding it is - * a pain in the a**. Especially as I don't do any locking... - * - * The code may look a bit weird, but that's just because I've tried to - * handle things like file-size changes in a somewhat graceful manner. - * Anyway, truncating a file at the same time somebody else writes to it - * is likely to result in pretty weird behaviour... - * - * The new code handles normal truncates (size = 0) as well as the more - * general case (size = XXX). I hope. - */ - -static int trunc_direct(struct inode * inode) -{ - int i, tmp; - unsigned long * p; - struct buffer_head * bh; - int retry = 0; -#define DIRECT_BLOCK ((inode->i_size + 1023) >> 10) - -repeat: - for (i = DIRECT_BLOCK ; i < 9 ; i++) { - p = inode->u.ext_i.i_data+i; - if (!(tmp = *p)) - continue; - bh = getblk(inode->i_dev,tmp,BLOCK_SIZE); - if (i < DIRECT_BLOCK) { - brelse(bh); - goto repeat; - } - if ((bh && bh->b_count != 1) || tmp != *p) { - retry = 1; - brelse(bh); - continue; - } - *p = 0; - inode->i_dirt = 1; - brelse(bh); - ext_free_block(inode->i_sb,tmp); - } - return retry; -} - -static int trunc_indirect(struct inode * inode, int offset, unsigned long * p) -{ - int i, tmp; - struct buffer_head * bh; - struct buffer_head * ind_bh; - unsigned long * ind; - int retry = 0; -#define INDIRECT_BLOCK (DIRECT_BLOCK-offset) - - tmp = *p; - if (!tmp) - return 0; - ind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE); - if (tmp != *p) { - brelse(ind_bh); - return 1; - } - if (!ind_bh) { - *p = 0; - return 0; - } -repeat: - for (i = INDIRECT_BLOCK ; i < 256 ; i++) { - if (i < 0) - i = 0; - if (i < INDIRECT_BLOCK) - goto repeat; - ind = i+(unsigned long *) ind_bh->b_data; - tmp = *ind; - if (!tmp) - continue; - bh = getblk(inode->i_dev,tmp,BLOCK_SIZE); - if (i < INDIRECT_BLOCK) { - brelse(bh); - goto repeat; - } - if ((bh && bh->b_count != 1) || tmp != *ind) { - retry = 1; - brelse(bh); - continue; - } - *ind = 0; - mark_buffer_dirty(ind_bh, 1); - brelse(bh); - ext_free_block(inode->i_sb,tmp); - } - ind = (unsigned long *) ind_bh->b_data; - for (i = 0; i < 256; i++) - if (*(ind++)) - break; - if (i >= 256) - if (ind_bh->b_count != 1) - retry = 1; - else { - tmp = *p; - *p = 0; - inode->i_dirt = 1; - ext_free_block(inode->i_sb,tmp); - } - brelse(ind_bh); - return retry; -} - -static int trunc_dindirect(struct inode * inode, int offset, unsigned long * p) -{ - int i,tmp; - struct buffer_head * dind_bh; - unsigned long * dind; - int retry = 0; -#define DINDIRECT_BLOCK ((DIRECT_BLOCK-offset)>>8) - - tmp = *p; - if (!tmp) - return 0; - dind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE); - if (tmp != *p) { - brelse(dind_bh); - return 1; - } - if (!dind_bh) { - *p = 0; - return 0; - } -repeat: - for (i = DINDIRECT_BLOCK ; i < 256 ; i ++) { - if (i < 0) - i = 0; - if (i < DINDIRECT_BLOCK) - goto repeat; - dind = i+(unsigned long *) dind_bh->b_data; - tmp = *dind; - if (!tmp) - continue; - retry |= trunc_indirect(inode,offset+(i<<8),dind); - mark_buffer_dirty(dind_bh, 1); - } - dind = (unsigned long *) dind_bh->b_data; - for (i = 0; i < 256; i++) - if (*(dind++)) - break; - if (i >= 256) - if (dind_bh->b_count != 1) - retry = 1; - else { - tmp = *p; - *p = 0; - inode->i_dirt = 1; - ext_free_block(inode->i_sb,tmp); - } - brelse(dind_bh); - return retry; -} - -static int trunc_tindirect(struct inode * inode) -{ - int i,tmp; - struct buffer_head * tind_bh; - unsigned long * tind, * p; - int retry = 0; -#define TINDIRECT_BLOCK ((DIRECT_BLOCK-(256*256+256+9))>>16) - - p = inode->u.ext_i.i_data+11; - if (!(tmp = *p)) - return 0; - tind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE); - if (tmp != *p) { - brelse(tind_bh); - return 1; - } - if (!tind_bh) { - *p = 0; - return 0; - } -repeat: - for (i = TINDIRECT_BLOCK ; i < 256 ; i ++) { - if (i < 0) - i = 0; - if (i < TINDIRECT_BLOCK) - goto repeat; - tind = i+(unsigned long *) tind_bh->b_data; - retry |= trunc_dindirect(inode,9+256+256*256+(i<<16),tind); - mark_buffer_dirty(tind_bh, 1); - } - tind = (unsigned long *) tind_bh->b_data; - for (i = 0; i < 256; i++) - if (*(tind++)) - break; - if (i >= 256) - if (tind_bh->b_count != 1) - retry = 1; - else { - tmp = *p; - *p = 0; - inode->i_dirt = 1; - ext_free_block(inode->i_sb,tmp); - } - brelse(tind_bh); - return retry; -} - -void ext_truncate(struct inode * inode) -{ - int retry; - - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - while (1) { - retry = trunc_direct(inode); - retry |= trunc_indirect(inode,9,inode->u.ext_i.i_data+9); - retry |= trunc_dindirect(inode,9+256,inode->u.ext_i.i_data+10); - retry |= trunc_tindirect(inode); - if (!retry) - break; - current->counter = 0; - schedule(); - } - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_dirt = 1; -} - -/* - * Called when an inode is released. Note that this is different - * from ext_open: open gets called at every open, but release - * gets called only when /all/ the files are closed. - */ -void ext_release(struct inode * inode, struct file * filp) -{ - printk("ext_release not implemented\n"); -} diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 7de729e4b..fce6fc4c8 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -39,7 +39,7 @@ static struct file_operations ext2_dir_operations = { ext2_dir_read, /* read */ NULL, /* write - bad */ ext2_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ ext2_ioctl, /* ioctl */ NULL, /* mmap */ NULL, /* no special open code */ @@ -211,7 +211,7 @@ revalidate: offset = 0; brelse (bh); } - if (!IS_RDONLY(inode)) { + if (DO_UPDATE_ATIME(inode)) { inode->i_atime = CURRENT_TIME; inode->i_dirt = 1; } diff --git a/fs/ext2/file.c b/fs/ext2/file.c index c336a5ba6..274dc31fd 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -38,7 +38,7 @@ static long long ext2_file_lseek(struct inode *, struct file *, long long, int); static long ext2_file_write (struct inode *, struct file *, const char *, unsigned long); -static void ext2_release_file (struct inode *, struct file *); +static int ext2_release_file (struct inode *, struct file *); /* * We have mostly NULL's here: the current defaults are ok for @@ -49,7 +49,7 @@ static struct file_operations ext2_file_operations = { generic_file_read, /* read */ ext2_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ ext2_ioctl, /* ioctl */ generic_file_mmap, /* mmap */ NULL, /* no special open is needed */ @@ -120,7 +120,7 @@ static inline void remove_suid(struct inode *inode) /* was any of the uid bits set? */ mode &= inode->i_mode; - if (mode && suser()) { + if (mode && !suser()) { inode->i_mode &= ~mode; inode->i_dirt = 1; } @@ -260,8 +260,9 @@ static long ext2_file_write (struct inode * inode, struct file * filp, * from ext2_open: open gets called at every open, but release * gets called only when /all/ the files are closed. */ -static void ext2_release_file (struct inode * inode, struct file * filp) +static int ext2_release_file (struct inode * inode, struct file * filp) { if (filp->f_mode & 2) ext2_discard_prealloc (inode); + return 0; } diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c index 1d608d07d..9993af1a6 100644 --- a/fs/ext2/fsync.c +++ b/fs/ext2/fsync.c @@ -13,10 +13,15 @@ * * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 + * + * Removed unnecessary code duplication for little endian machines + * and excessive __inline__s. + * Andi Kleen, 1997 */ #include <asm/uaccess.h> #include <asm/system.h> +#include <asm/byteorder.h> #include <linux/errno.h> #include <linux/fs.h> @@ -30,7 +35,7 @@ #define blocksize (EXT2_BLOCK_SIZE(inode->i_sb)) #define addr_per_block (EXT2_ADDR_PER_BLOCK(inode->i_sb)) -static __inline__ int sync_block (struct inode * inode, u32 * block, int wait) +static int sync_block (struct inode * inode, u32 * block, int wait) { struct buffer_head * bh; int tmp; @@ -58,7 +63,8 @@ static __inline__ int sync_block (struct inode * inode, u32 * block, int wait) return 0; } -static __inline__ int sync_block_swab32 (struct inode * inode, u32 * block, int wait) +#ifndef __LITTLE_ENDIAN +static int sync_block_swab32 (struct inode * inode, u32 * block, int wait) { struct buffer_head * bh; int tmp; @@ -85,8 +91,12 @@ static __inline__ int sync_block_swab32 (struct inode * inode, u32 * block, int bh->b_count--; return 0; } +#else +#define sync_block_swab32 sync_block +#endif + -static __inline__ int sync_iblock (struct inode * inode, u32 * iblock, +static int sync_iblock (struct inode * inode, u32 * iblock, struct buffer_head ** bh, int wait) { int rc, tmp; @@ -109,7 +119,8 @@ static __inline__ int sync_iblock (struct inode * inode, u32 * iblock, return 0; } -static __inline__ int sync_iblock_swab32 (struct inode * inode, u32 * iblock, +#ifndef __LITTLE_ENDIAN +static int sync_iblock_swab32 (struct inode * inode, u32 * iblock, struct buffer_head ** bh, int wait) { int rc, tmp; @@ -131,9 +142,11 @@ static __inline__ int sync_iblock_swab32 (struct inode * inode, u32 * iblock, return -1; return 0; } +#else +#define sync_iblock_swab32 sync_iblock +#endif - -static __inline__ int sync_direct (struct inode * inode, int wait) +static int sync_direct (struct inode * inode, int wait) { int i; int rc, err = 0; @@ -148,7 +161,7 @@ static __inline__ int sync_direct (struct inode * inode, int wait) return err; } -static __inline__ int sync_indirect (struct inode * inode, u32 * iblock, int wait) +static int sync_indirect (struct inode * inode, u32 * iblock, int wait) { int i; struct buffer_head * ind_bh; @@ -171,6 +184,7 @@ static __inline__ int sync_indirect (struct inode * inode, u32 * iblock, int wai return err; } +#ifndef __LITTLE_ENDIAN static __inline__ int sync_indirect_swab32 (struct inode * inode, u32 * iblock, int wait) { int i; @@ -193,8 +207,11 @@ static __inline__ int sync_indirect_swab32 (struct inode * inode, u32 * iblock, brelse (ind_bh); return err; } +#else +#define sync_indirect_swab32 sync_indirect +#endif -static __inline__ int sync_dindirect (struct inode * inode, u32 * diblock, int wait) +static int sync_dindirect (struct inode * inode, u32 * diblock, int wait) { int i; struct buffer_head * dind_bh; @@ -217,6 +234,7 @@ static __inline__ int sync_dindirect (struct inode * inode, u32 * diblock, int w return err; } +#ifndef __LITTLE_ENDIAN static __inline__ int sync_dindirect_swab32 (struct inode * inode, u32 * diblock, int wait) { int i; @@ -239,8 +257,11 @@ static __inline__ int sync_dindirect_swab32 (struct inode * inode, u32 * diblock brelse (dind_bh); return err; } +#else +#define sync_dindirect_swab32 sync_dindirect +#endif -static __inline__ int sync_tindirect (struct inode * inode, u32 * tiblock, int wait) +static int sync_tindirect (struct inode * inode, u32 * tiblock, int wait) { int i; struct buffer_head * tind_bh; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index eddcc5ab5..5a876261e 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -329,42 +329,6 @@ repeat: return result; } -static int block_getcluster (struct inode * inode, struct buffer_head * bh, - int nr, - int blocksize) -{ - u32 * p; - int firstblock = 0; - int result = 0; - int i; - - /* Check to see if clustering possible here. */ - - if(!bh) return 0; - - if((nr & ((PAGE_SIZE >> EXT2_BLOCK_SIZE_BITS(inode->i_sb)) - 1)) != 0) - goto out; - if(nr + 3 > EXT2_ADDR_PER_BLOCK(inode->i_sb)) goto out; - - for(i=0; i< (PAGE_SIZE >> EXT2_BLOCK_SIZE_BITS(inode->i_sb)); i++) { - p = (u32 *) bh->b_data + nr + i; - - /* All blocks in cluster must already be allocated */ - if(le32_to_cpu(*p) == 0) goto out; - - /* See if aligned correctly */ - if(i==0) firstblock = le32_to_cpu(*p); - else if(le32_to_cpu(*p) != firstblock + i) goto out; - } - - p = (u32 *) bh->b_data + nr; - result = generate_cluster_swab32(bh->b_dev, (int *) p, blocksize); - - out: - brelse(bh); - return result; -} - struct buffer_head * ext2_getblk (struct inode * inode, long block, int create, int * err) { @@ -427,56 +391,6 @@ struct buffer_head * ext2_getblk (struct inode * inode, long block, inode->i_sb->s_blocksize, b, err); } -int ext2_getcluster (struct inode * inode, long block) -{ - struct buffer_head * bh; - int err, create; - unsigned long b; - unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); - int addr_per_block_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb); - - create = 0; - err = -EIO; - if (block < 0) { - ext2_warning (inode->i_sb, "ext2_getblk", "block < 0"); - return 0; - } - if (block > EXT2_NDIR_BLOCKS + addr_per_block + - (1 << (addr_per_block_bits * 2)) + - ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) { - ext2_warning (inode->i_sb, "ext2_getblk", "block > big"); - return 0; - } - - err = -ENOSPC; - b = block; - if (block < EXT2_NDIR_BLOCKS) return 0; - - block -= EXT2_NDIR_BLOCKS; - - if (block < addr_per_block) { - bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, &err); - return block_getcluster (inode, bh, block, - inode->i_sb->s_blocksize); - } - block -= addr_per_block; - if (block < (1 << (addr_per_block_bits * 2))) { - bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, &err); - bh = block_getblk (inode, bh, block >> addr_per_block_bits, - create, inode->i_sb->s_blocksize, b, &err); - return block_getcluster (inode, bh, block & (addr_per_block - 1), - inode->i_sb->s_blocksize); - } - block -= (1 << (addr_per_block_bits * 2)); - bh = inode_getblk (inode, EXT2_TIND_BLOCK, create, b, &err); - bh = block_getblk (inode, bh, block >> (addr_per_block_bits * 2), - create, inode->i_sb->s_blocksize, b, &err); - bh = block_getblk (inode, bh, (block >> addr_per_block_bits) & (addr_per_block - 1), - create, inode->i_sb->s_blocksize, b, &err); - return block_getcluster (inode, bh, block & (addr_per_block - 1), - inode->i_sb->s_blocksize); -} - struct buffer_head * ext2_bread (struct inode * inode, int block, int create, int *err) { diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 8235a6301..0892ce79f 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -19,23 +19,16 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg) { - int err; unsigned long flags; ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { case EXT2_IOC_GETFLAGS: - err = verify_area(VERIFY_WRITE, (int *) arg, sizeof(int)); - if (err) - return err; - put_user(inode->u.ext2_i.i_flags, (int *) arg); - return 0; + return put_user(inode->u.ext2_i.i_flags, (int *) arg); case EXT2_IOC_SETFLAGS: - err = verify_area(VERIFY_READ, (int *) arg, sizeof(int)); - if (err) - return err; - get_user(flags, (int *) arg); + if (get_user(flags, (int *) arg)) + return -EFAULT; /* * The IMMUTABLE and APPEND_ONLY flags can only be changed by * the super user when the security level is zero. @@ -64,20 +57,14 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, inode->i_dirt = 1; return 0; case EXT2_IOC_GETVERSION: - err = verify_area(VERIFY_WRITE, (int *) arg, sizeof(int)); - if (err) - return err; - put_user(inode->u.ext2_i.i_version, (int *) arg); - return 0; + return put_user(inode->u.ext2_i.i_version, (int *) arg); case EXT2_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !fsuser()) return -EPERM; if (IS_RDONLY(inode)) return -EROFS; - err = verify_area(VERIFY_READ, (int *) arg, sizeof(int)); - if (err) - return err; - get_user(inode->u.ext2_i.i_version, (int *) arg); + if (get_user(inode->u.ext2_i.i_version, (int *) arg)) + return -EFAULT; inode->i_ctime = CURRENT_TIME; inode->i_dirt = 1; return 0; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 6bcdb5e20..26e18852e 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -32,6 +32,8 @@ #include <linux/stat.h> #include <linux/string.h> #include <linux/locks.h> +#include <linux/blkdev.h> +#include <linux/init.h> static char error_buf[1024]; @@ -129,7 +131,7 @@ void ext2_put_super (struct super_block * sb) return; } -static struct super_operations ext2_sops = { +static struct super_operations ext2_sops = { ext2_read_inode, NULL, ext2_write_inode, @@ -377,10 +379,26 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data, unsigned short resuid = EXT2_DEF_RESUID; unsigned short resgid = EXT2_DEF_RESGID; unsigned long logic_sb_block = 1; + unsigned long offset = 0; kdev_t dev = sb->s_dev; + int blocksize = BLOCK_SIZE; + int hblock; int db_count; int i, j; + /* + * See what the current blocksize for the device is, and + * use that as the blocksize. Otherwise (or if the blocksize + * is smaller than the default) use the default. + * This is important for devices that have a hardware + * sectorsize that is larger than the default. + */ + blocksize = get_hardblocksize(dev); + if( blocksize == 0 || blocksize < BLOCK_SIZE ) + { + blocksize = BLOCK_SIZE; + } + sb->u.ext2_sb.s_mount_opt = 0; set_opt (sb->u.ext2_sb.s_mount_opt, CHECK_NORMAL); if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, @@ -391,8 +409,19 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data, MOD_INC_USE_COUNT; lock_super (sb); - set_blocksize (dev, BLOCK_SIZE); - if (!(bh = bread (dev, sb_block, BLOCK_SIZE))) { + set_blocksize (dev, blocksize); + + /* + * If the superblock doesn't start on a sector boundary, + * calculate the offset. FIXME(eric) this doesn't make sense + * that we would have to do this. + */ + if (blocksize != BLOCK_SIZE) { + logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize; + offset = (sb_block*BLOCK_SIZE) % blocksize; + } + + if (!(bh = bread (dev, logic_sb_block, blocksize))) { sb->s_dev = 0; unlock_super (sb); printk ("EXT2-fs: unable to read superblock\n"); @@ -403,7 +432,7 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data, * Note: s_es must be initialized s_es as soon as possible because * some ext2 macro-instructions depend on its value */ - es = (struct ext2_super_block *) bh->b_data; + es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); sb->u.ext2_sb.s_es = es; sb->s_magic = le16_to_cpu(es->s_magic); if (sb->s_magic != EXT2_SUPER_MAGIC) { @@ -421,24 +450,34 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data, if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV) { if (le32_to_cpu(es->s_feature_incompat) & ~EXT2_FEATURE_INCOMPAT_SUPP) { printk("EXT2-fs: %s: couldn't mount because of " - "unsupported optional features.\n", + "unsupported optional features.\n", kdevname(dev)); goto failed_mount; } if (!(sb->s_flags & MS_RDONLY) && (le32_to_cpu(es->s_feature_ro_compat) & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { printk("EXT2-fs: %s: couldn't mount RDWR because of " - "unsupported optional features.\n", + "unsupported optional features.\n", kdevname(dev)); goto failed_mount; } } sb->s_blocksize_bits = le32_to_cpu(sb->u.ext2_sb.s_es->s_log_block_size) + 10; sb->s_blocksize = 1 << sb->s_blocksize_bits; - if (sb->s_blocksize != BLOCK_SIZE && - (sb->s_blocksize == 1024 || sb->s_blocksize == 2048 || + if (sb->s_blocksize != BLOCK_SIZE && + (sb->s_blocksize == 1024 || sb->s_blocksize == 2048 || sb->s_blocksize == 4096)) { - unsigned long offset; + /* + * Make sure the blocksize for the filesystem is larger + * than the hardware sectorsize for the machine. + */ + hblock = get_hardblocksize(dev); + if( (hblock != 0) + && (sb->s_blocksize < hblock) ) + { + printk("EXT2-fs: blocksize too small for device.\n"); + goto failed_mount; + } brelse (bh); set_blocksize (dev, sb->s_blocksize); @@ -675,7 +714,7 @@ int ext2_remount (struct super_block * sb, int * flags, char * data) else { /* * Mounting a RDONLY partition read-write, so reread and - * store the current valid flag. (It may have been changed + * store the current valid flag. (It may have been changed * by e2fsck since we originally mounted the partition.) */ sb->u.ext2_sb.s_mount_state = le16_to_cpu(es->s_state); @@ -689,19 +728,17 @@ static struct file_system_type ext2_fs_type = { ext2_read_super, "ext2", 1, NULL }; -int init_ext2_fs(void) +__initfunc(int init_ext2_fs(void)) { return register_filesystem(&ext2_fs_type); } #ifdef MODULE +EXPORT_NO_SYMBOLS; + int init_module(void) { - int status; - - if ((status = init_ext2_fs()) == 0) - register_symtab(0); - return status; + return init_ext2_fs(); } void cleanup_module(void) diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 461e038c2..31f8276b0 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -87,7 +87,7 @@ static int ext2_follow_link(struct inode * dir, struct inode * inode, link = bh->b_data; } else link = (char *) inode->u.ext2_i.i_data; - if (!IS_RDONLY(inode)) { + if (DO_UPDATE_ATIME(inode)) { inode->i_atime = CURRENT_TIME; inode->i_dirt = 1; } @@ -105,7 +105,6 @@ static int ext2_readlink (struct inode * inode, char * buffer, int buflen) struct buffer_head * bh = NULL; char * link; int i, err; - char c; if (!S_ISLNK(inode->i_mode)) { iput (inode); @@ -123,12 +122,13 @@ static int ext2_readlink (struct inode * inode, char * buffer, int buflen) } else link = (char *) inode->u.ext2_i.i_data; + i = 0; - while (i < buflen && (c = link[i])) { + while (i < buflen && link[i]) i++; - put_user (c, buffer++); - } - if (!IS_RDONLY(inode)) { + if (copy_to_user(buffer, link, i)) + i = -EFAULT; + if (DO_UPDATE_ATIME(inode)) { inode->i_atime = CURRENT_TIME; inode->i_dirt = 1; } diff --git a/fs/fat/buffer.c b/fs/fat/buffer.c index eebbf29b5..2a6fc6b74 100644 --- a/fs/fat/buffer.c +++ b/fs/fat/buffer.c @@ -16,13 +16,18 @@ struct buffer_head *fat_bread ( { struct buffer_head *ret = NULL; - /* Note that the blocksize is 512 or 1024, but the first read - is always of size 1024. Doing readahead may be counterproductive + /* Note that the blocksize is 512, 1024 or 2048, but the first read + is always of size 1024 (or 2048). Doing readahead may be counterproductive or just plain wrong. */ if (sb->s_blocksize == 512) { ret = bread (sb->s_dev,block,512); } else { - struct buffer_head *real = bread (sb->s_dev,block>>1,1024); + struct buffer_head *real; + if (sb->s_blocksize == 1024){ + real = bread (sb->s_dev,block>>1,1024); + }else{ + real = bread (sb->s_dev,block>>2,2048); + } if (real != NULL){ ret = (struct buffer_head *) @@ -59,7 +64,11 @@ struct buffer_head *fat_bread ( */ memset (ret,0,sizeof(*ret)); ret->b_data = real->b_data; - if (block & 1) ret->b_data += 512; + if (sb->s_blocksize == 2048) { + if (block & 3) ret->b_data += (block & 3) << 9; + }else{ + if (block & 1) ret->b_data += 512; + } ret->b_next = real; }else{ brelse (real); diff --git a/fs/fat/cache.c b/fs/fat/cache.c index af79ce25e..62ff8af1e 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -277,7 +277,7 @@ int fat_free(struct inode *inode,int skip) } if (last) fat_access(inode->i_sb,last,MSDOS_SB(inode->i_sb)->fat_bits == - 12 ? 0xff8 : 0xfff8); + 12 ? EOF_FAT12 : EOF_FAT16); else { MSDOS_I(inode)->i_start = 0; inode->i_dirt = 1; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 6938b7b9e..45b31836b 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -40,7 +40,7 @@ struct file_operations fat_dir_operations = { fat_dir_read, /* read */ NULL, /* write - bad */ fat_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ fat_dir_ioctl, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/fat/fatfs_syms.c b/fs/fat/fatfs_syms.c index 6318549cc..6a10cb4af 100644 --- a/fs/fat/fatfs_syms.c +++ b/fs/fat/fatfs_syms.c @@ -4,6 +4,7 @@ * Exported kernel symbols for the low-level FAT-based fs support. * */ +#include <linux/config.h> #include <linux/module.h> #include <linux/mm.h> @@ -14,45 +15,40 @@ extern struct file_operations fat_dir_operations; -static struct symbol_table fat_syms = { -#include <linux/symtab_begin.h> - X(fat_a2alias), - X(fat_a2uni), - X(fat_add_cluster), - X(fat_bmap), - X(fat_brelse), - X(fat_cache_inval_inode), - X(fat_code2uni), - X(fat_date_unix2dos), - X(fat_dir_operations), - X(fat_file_read), - X(fat_file_write), - X(fat_fs_panic), - X(fat_get_entry), - X(fat_lock_creation), - X(fat_mark_buffer_dirty), - X(fat_mmap), - X(fat_notify_change), - X(fat_parent_ino), - X(fat_put_inode), - X(fat_put_super), - X(fat_read_inode), - X(fat_read_super), - X(fat_readdirx), - X(fat_readdir), - X(fat_scan), - X(fat_smap), - X(fat_statfs), - X(fat_truncate), - X(fat_uni2asc_pg), - X(fat_uni2code), - X(fat_unlock_creation), - X(fat_write_inode), -#include <linux/symtab_end.h> -}; +EXPORT_SYMBOL(fat_a2alias); +EXPORT_SYMBOL(fat_a2uni); +EXPORT_SYMBOL(fat_add_cluster); +EXPORT_SYMBOL(fat_bmap); +EXPORT_SYMBOL(fat_brelse); +EXPORT_SYMBOL(fat_cache_inval_inode); +EXPORT_SYMBOL(fat_code2uni); +EXPORT_SYMBOL(fat_date_unix2dos); +EXPORT_SYMBOL(fat_dir_operations); +EXPORT_SYMBOL(fat_file_read); +EXPORT_SYMBOL(fat_file_write); +EXPORT_SYMBOL(fat_fs_panic); +EXPORT_SYMBOL(fat_get_entry); +EXPORT_SYMBOL(fat_lock_creation); +EXPORT_SYMBOL(fat_mark_buffer_dirty); +EXPORT_SYMBOL(fat_mmap); +EXPORT_SYMBOL(fat_notify_change); +EXPORT_SYMBOL(fat_parent_ino); +EXPORT_SYMBOL(fat_put_inode); +EXPORT_SYMBOL(fat_put_super); +EXPORT_SYMBOL(fat_read_inode); +EXPORT_SYMBOL(fat_read_super); +EXPORT_SYMBOL(fat_readdirx); +EXPORT_SYMBOL(fat_readdir); +EXPORT_SYMBOL(fat_scan); +EXPORT_SYMBOL(fat_smap); +EXPORT_SYMBOL(fat_statfs); +EXPORT_SYMBOL(fat_truncate); +EXPORT_SYMBOL(fat_uni2asc_pg); +EXPORT_SYMBOL(fat_uni2code); +EXPORT_SYMBOL(fat_unlock_creation); +EXPORT_SYMBOL(fat_write_inode); int init_fat_fs(void) { - return register_symtab(&fat_syms); + return 0; } - diff --git a/fs/fat/file.c b/fs/fat/file.c index b9162f7d9..6dec1ba42 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -32,7 +32,7 @@ static struct file_operations fat_file_operations = { fat_file_read, /* read */ fat_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ generic_file_mmap, /* mmap */ NULL, /* no special open is needed */ @@ -71,7 +71,7 @@ static struct file_operations fat_file_operations_1024 = { fat_file_read, /* read */ fat_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ fat_mmap, /* mmap */ NULL, /* no special open is needed */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 311907407..cf14856d1 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -172,8 +172,8 @@ static int parse_options(char *options,int *fat, int *blksize, int *debug, *blksize = simple_strtoul(value,&value,0); if (*value) return 0; - if (*blksize != 512 && *blksize != 1024){ - printk ("MSDOS FS: Invalid blocksize (512 or 1024)\n"); + if (*blksize != 512 && *blksize != 1024 && *blksize != 2048){ + printk ("MSDOS FS: Invalid blocksize (512, 1024 or 2048)\n"); } } else if (!strcmp(this_char,"sys_immutable")) { @@ -205,16 +205,26 @@ struct super_block *fat_read_super(struct super_block *sb,void *data, int silent } } if (!parse_options((char *) data, &fat, &blksize, &debug, &opts) - || (blksize != 512 && blksize != 1024)) { + || (blksize != 512 && blksize != 1024 && blksize != 2048)) + { sb->s_dev = 0; MOD_DEC_USE_COUNT; return NULL; } cache_init(); lock_super(sb); - /* The first read is always 1024 bytes */ - sb->s_blocksize = 1024; - set_blocksize(sb->s_dev, 1024); + if( blksize > 1024 ) + { + /* Force the superblock to a larger size here. */ + sb->s_blocksize = blksize; + set_blocksize(sb->s_dev, blksize); + } + else + { + /* The first read is always 1024 bytes */ + sb->s_blocksize = 1024; + set_blocksize(sb->s_dev, 1024); + } bh = fat_bread(sb, 0); unlock_super(sb); if (bh == NULL || !fat_is_uptodate(sb,bh)) { @@ -285,7 +295,7 @@ struct super_block *fat_read_super(struct super_block *sb,void *data, int silent /* the misfit with buffer cache and cluster */ /* because clusters (DOS) are often aligned */ /* on odd sectors. */ - sb->s_blocksize_bits = blksize == 512 ? 9 : 10; + sb->s_blocksize_bits = blksize == 512 ? 9 : (blksize == 1024 ? 10 : 11); if (error || debug) { /* The MSDOS_CAN_BMAP is obsolete, but left just to remember */ printk("[MS-DOS FS Rel. 12,FAT %d,check=%c,conv=%c," @@ -294,12 +304,12 @@ struct super_block *fat_read_super(struct super_block *sb,void *data, int silent opts.conversion,opts.fs_uid,opts.fs_gid,opts.fs_umask, MSDOS_CAN_BMAP(MSDOS_SB(sb)) ? ",bmap" : ""); printk("[me=0x%x,cs=%d,#f=%d,fs=%d,fl=%d,ds=%d,de=%d,data=%d," - "se=%d,ts=%ld,ls=%d]\n",b->media,MSDOS_SB(sb)->cluster_size, + "se=%d,ts=%d,ls=%d]\n",b->media,MSDOS_SB(sb)->cluster_size, MSDOS_SB(sb)->fats,MSDOS_SB(sb)->fat_start,MSDOS_SB(sb)->fat_length, MSDOS_SB(sb)->dir_start,MSDOS_SB(sb)->dir_entries, MSDOS_SB(sb)->data_start, - CF_LE_W(*(unsigned short *) &b->sectors), - (unsigned long)b->total_sect,logical_sector_size); + CF_LE_W(get_unaligned((unsigned short *) &b->sectors)), + CF_LE_L(b->total_sect),logical_sector_size); printk ("Transaction block size = %d\n",blksize); } if (MSDOS_SB(sb)->clusters+2 > fat_clusters) @@ -451,7 +461,7 @@ void fat_read_inode(struct inode *inode, struct inode_operations *fs_dir_inode_o !is_exec(raw_entry->ext))) ? S_IRUGO|S_IWUGO : S_IRWXUGO) & ~MSDOS_SB(sb)->options.fs_umask) | S_IFREG; - inode->i_op = (sb->s_blocksize == 1024) + inode->i_op = (sb->s_blocksize == 1024 || sb->s_blocksize == 2048) ? &fat_file_inode_operations_1024 : &fat_file_inode_operations; MSDOS_I(inode)->i_start = CF_LE_W(raw_entry->start); diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 120c522e7..d1e9bc6ca 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -77,7 +77,7 @@ int is_binary(char conversion,char *extension) /* (rename might deadlock before detecting cross-FS moves.) */ static struct wait_queue *creation_wait = NULL; -static creation_lock = 0; +static int creation_lock = 0; void fat_lock_creation(void) @@ -140,7 +140,7 @@ printk("free cluster: %d\n",nr); return -ENOSPC; } fat_access(sb,nr,MSDOS_SB(sb)->fat_bits == 12 ? - 0xff8 : 0xfff8); + EOF_FAT12 : EOF_FAT16); if (MSDOS_SB(sb)->free_clusters != -1) MSDOS_SB(sb)->free_clusters--; unlock_fat(sb); @@ -258,6 +258,9 @@ void fat_date_unix2dos(int unix_date,unsigned short *time, { int day,year,nl_day,month; + if (sys_tz.tz_dsttime) { + unix_date += 3600; + } unix_date -= sys_tz.tz_minuteswest*60; *time = (unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ (((unix_date/3600) % 24) << 11); diff --git a/fs/fcntl.c b/fs/fcntl.c index 99a1638e1..bedc02e89 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -10,6 +10,9 @@ #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/string.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/bitops.h> #include <asm/uaccess.h> @@ -35,20 +38,33 @@ static inline int dupfd(unsigned int fd, unsigned int arg) asmlinkage int sys_dup2(unsigned int oldfd, unsigned int newfd) { + int err = -EBADF; + + lock_kernel(); if (oldfd >= NR_OPEN || !current->files->fd[oldfd]) - return -EBADF; + goto out; + err = newfd; if (newfd == oldfd) - return newfd; + goto out; + err = -EBADF; if (newfd >= NR_OPEN) - return -EBADF; /* following POSIX.1 6.2.1 */ + goto out; /* following POSIX.1 6.2.1 */ sys_close(newfd); - return dupfd(oldfd,newfd); + err = dupfd(oldfd,newfd); +out: + unlock_kernel(); + return err; } asmlinkage int sys_dup(unsigned int fildes) { - return dupfd(fildes,0); + int ret; + + lock_kernel(); + ret = dupfd(fildes,0); + unlock_kernel(); + return ret; } asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) @@ -56,49 +72,61 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) struct file * filp; struct task_struct *p; int task_found = 0; + long err = -EBADF; + lock_kernel(); if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) - return -EBADF; + goto out; + err = 0; switch (cmd) { case F_DUPFD: - return dupfd(fd,arg); + err = dupfd(fd,arg); + break; case F_GETFD: - return FD_ISSET(fd, ¤t->files->close_on_exec); + err = FD_ISSET(fd, ¤t->files->close_on_exec); + break; case F_SETFD: if (arg&1) FD_SET(fd, ¤t->files->close_on_exec); else FD_CLR(fd, ¤t->files->close_on_exec); - return 0; + break; case F_GETFL: - return filp->f_flags; + err = filp->f_flags; + break; case F_SETFL: /* * In the case of an append-only file, O_APPEND * cannot be cleared */ + err = -EPERM; if (IS_APPEND(filp->f_inode) && !(arg & O_APPEND)) - return -EPERM; + break; + err = 0; if ((arg & FASYNC) && !(filp->f_flags & FASYNC) && filp->f_op->fasync) filp->f_op->fasync(filp->f_inode, filp, 1); if (!(arg & FASYNC) && (filp->f_flags & FASYNC) && filp->f_op->fasync) filp->f_op->fasync(filp->f_inode, filp, 0); - /* required for SunOS emulation */ + /* required for strict SunOS emulation */ if (O_NONBLOCK != O_NDELAY) if (arg & O_NDELAY) arg |= O_NONBLOCK; - filp->f_flags &= ~(O_APPEND | O_NONBLOCK | FASYNC); + filp->f_flags &= ~(O_APPEND | O_NONBLOCK | + O_NDELAY | FASYNC); filp->f_flags |= arg & (O_APPEND | O_NONBLOCK | - FASYNC); - return 0; + O_NDELAY | FASYNC); + break; case F_GETLK: - return fcntl_getlk(fd, (struct flock *) arg); + err = fcntl_getlk(fd, (struct flock *) arg); + break; case F_SETLK: - return fcntl_setlk(fd, cmd, (struct flock *) arg); + err = fcntl_setlk(fd, cmd, (struct flock *) arg); + break; case F_SETLKW: - return fcntl_setlk(fd, cmd, (struct flock *) arg); + err = fcntl_setlk(fd, cmd, (struct flock *) arg); + break; case F_GETOWN: /* * XXX If f_owner is a process group, the @@ -107,7 +135,8 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) * current syscall conventions, the only way * to fix this will be in libc. */ - return filp->f_owner; + err = filp->f_owner; + break; case F_SETOWN: /* * Add the security checks - AC. Without @@ -138,29 +167,35 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) if ((p->pid == arg) || (p->pid == -arg) || (p->pgrp == -arg)) { task_found++; + err = -EPERM; if ((p->session != current->session) && (p->uid != current->uid) && (p->euid != current->euid) && !suser()) - return -EPERM; + goto out; break; } } + err = -EINVAL; if ((task_found == 0) && !suser()) - return -EINVAL; + break; fasync_ok: + err = 0; filp->f_owner = arg; if (S_ISSOCK (filp->f_inode->i_mode)) - sock_fcntl (filp, F_SETOWN, arg); - return 0; + err = sock_fcntl (filp, F_SETOWN, arg); + break; default: /* sockets need a few special fcntls. */ if (S_ISSOCK (filp->f_inode->i_mode)) - { - return (sock_fcntl (filp, cmd, arg)); - } - return -EINVAL; + err = sock_fcntl (filp, cmd, arg); + else + err = -EINVAL; + break; } +out: + unlock_kernel(); + return err; } void kill_fasync(struct fasync_struct *fa, int sig) diff --git a/fs/filesystems.c b/fs/filesystems.c index dacda9315..7d5b51ef1 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -10,9 +10,7 @@ #include <linux/fs.h> #include <linux/minix_fs.h> -#include <linux/ext_fs.h> #include <linux/ext2_fs.h> -#include <linux/xia_fs.h> #include <linux/msdos_fs.h> #include <linux/umsdos_fs.h> #include <linux/proc_fs.h> @@ -24,40 +22,46 @@ #include <linux/ncp_fs.h> #include <linux/affs_fs.h> #include <linux/ufs_fs.h> +#include <linux/romfs_fs.h> +#include <linux/auto_fs.h> #include <linux/major.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#ifdef CONFIG_KERNELD +#include <linux/kerneld.h> +#endif extern void device_setup(void); extern void binfmt_setup(void); +extern void free_initmem(void); /* This may be used only once, enforced by 'static int callable' */ asmlinkage int sys_setup(void) { static int callable = 1; + int err = -1; + lock_kernel(); if (!callable) - return -1; + goto out; callable = 0; - + device_setup(); binfmt_setup(); -#ifdef CONFIG_EXT_FS - init_ext_fs(); -#endif - #ifdef CONFIG_EXT2_FS init_ext2_fs(); #endif -#ifdef CONFIG_XIA_FS - init_xiafs_fs(); -#endif - #ifdef CONFIG_MINIX_FS init_minix_fs(); #endif +#ifdef CONFIG_ROMFS_FS + init_romfs_fs(); +#endif + #ifdef CONFIG_UMSDOS_FS init_umsdos_fs(); #endif @@ -78,6 +82,10 @@ asmlinkage int sys_setup(void) init_proc_fs(); #endif +#ifdef CONFIG_LOCKD + nlmxdr_init(); +#endif + #ifdef CONFIG_NFS_FS init_nfs_fs(); #endif @@ -110,6 +118,46 @@ asmlinkage int sys_setup(void) init_ufs_fs(); #endif +#ifdef CONFIG_AUTOFS_FS + init_autofs_fs(); +#endif + mount_root(); - return 0; + + free_initmem(); + + err = 0; +out: + unlock_kernel(); + return err; +} + +#ifndef CONFIG_NFSD +#ifdef CONFIG_NFSD_MODULE +int (*do_nfsservctl)(int, void *, void *) = NULL; +#endif +int +asmlinkage sys_nfsservctl(int cmd, void *argp, void *resp) +{ +#ifndef CONFIG_NFSD_MODULE + return -ENOSYS; +#else + int ret = -ENOSYS; + + lock_kernel(); + if (do_nfsservctl) { + ret = do_nfsservctl(cmd, argp, resp); + goto out; + } +#ifdef CONFIG_KERNELD + if (request_module ("nfsd") == 0) { + if (do_nfsservctl) + ret = do_nfsservctl(cmd, argp, resp); + } +#endif /* CONFIG_KERNELD */ +out: + unlock_kernel(); + return ret; +#endif /* CONFIG_NFSD_MODULE */ } +#endif /* CONFIG_NFSD */ diff --git a/fs/hpfs/hpfs_fs.c b/fs/hpfs/hpfs_fs.c index 3d70172f5..5bc73819c 100644 --- a/fs/hpfs/hpfs_fs.c +++ b/fs/hpfs/hpfs_fs.c @@ -23,13 +23,14 @@ #include <linux/locks.h> #include <linux/stat.h> #include <linux/string.h> +#include <linux/init.h> #include <asm/bitops.h> #include <asm/uaccess.h> #include "hpfs.h" #include "hpfs_caps.h" -/* +/* * HPFS is a mixture of 512-byte blocks and 2048-byte blocks. The 2k blocks * are used for directories and bitmaps. For bmap to work, we must run the * file system with 512-byte blocks. The 2k blocks are assembled in buffers @@ -115,7 +116,7 @@ * seen -- in fact noncontiguous files are seldom seen. I think this is * partly the open() call that lets programs specify the length of an * output file when they know it, and partly because HPFS.IFS really is - * very good at resisting fragmentation. + * very good at resisting fragmentation. */ /* notation */ @@ -153,7 +154,7 @@ static const struct file_operations hpfs_file_ops = hpfs_file_read, /* read */ NULL, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ generic_file_mmap, /* mmap */ NULL, /* no special open is needed */ @@ -197,7 +198,7 @@ static const struct file_operations hpfs_dir_ops = hpfs_dir_read, /* read */ NULL, /* write - bad */ hpfs_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ @@ -298,7 +299,7 @@ static inline fnode_secno ino_secno(ino_t ino) } /* - * test for directory's inode number + * test for directory's inode number */ static inline int ino_is_dir(ino_t ino) @@ -606,7 +607,7 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, else return 0; } - else if (!strcmp(p,"nocheck")) + else if (!strcmp(p,"nocheck")) *nocheck=1; else return 1; @@ -1043,7 +1044,7 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno) * Search allocation tree *b for the given file sector number and return * the disk sector number. Buffer *bhp has the tree in it, and can be * reused for subtrees when access to *b is no longer needed. - * *bhp is busy on entry and exit. + * *bhp is busy on entry and exit. */ static secno bplus_lookup(struct inode *inode, struct bplus_header *b, @@ -1342,7 +1343,7 @@ static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno, * 0 => . -1 => .. 1 1.1 ... 8.9 9 => files -2 => eof * * The directory inode caches one position-to-dnode correspondence so - * we won't have to repeatedly scan the top levels of the tree. + * we won't have to repeatedly scan the top levels of the tree. */ /* @@ -1427,7 +1428,7 @@ static int hpfs_readdir(struct inode *inode, struct file *filp, void * dirent, /* * Map the dir entry at subtree coordinates given by *posp, and - * increment *posp to point to the following dir entry. + * increment *posp to point to the following dir entry. */ static struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp, @@ -1748,19 +1749,17 @@ static struct file_system_type hpfs_fs_type = { hpfs_read_super, "hpfs", 1, NULL }; -int init_hpfs_fs(void) +__initfunc(int init_hpfs_fs(void)) { return register_filesystem(&hpfs_fs_type); } #ifdef MODULE +EXPORT_NO_SYMBOLS; + int init_module(void) { - int status; - - if ((status = init_hpfs_fs()) == 0) - register_symtab(0); - return status; + return init_hpfs_fs(); } void cleanup_module(void) @@ -1769,4 +1768,3 @@ void cleanup_module(void) } #endif - diff --git a/fs/inode.c b/fs/inode.c index 724e8c4cd..b1d9bda4e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1,229 +1,234 @@ /* - * linux/fs/inode.c + * linux/fs/inode.c: Keeping track of inodes. * - * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1997 David S. Miller */ -#include <linux/stat.h> -#include <linux/sched.h> #include <linux/kernel.h> +#include <linux/sched.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/string.h> -#include <asm/system.h> - -#define NR_IHASH 512 - -/* - * Be VERY careful when you access the inode hash table. There - * are some rather scary race conditions you need to take care of: - * - P1 tries to open file "xx", calls "iget()" with the proper - * inode number, but blocks because it's not on the list. - * - P2 deletes file "xx", gets the inode (which P1 has just read, - * but P1 hasn't woken up to the fact yet) - * - P2 iput()'s the inode, which now has i_nlink = 0 - * - P1 wakes up and has the inode, but now P2 has made that - * inode invalid (but P1 has no way of knowing that). - * - * The "updating" counter makes sure that when P1 blocks on the - * iget(), P2 can't delete the inode from under it because P2 - * will wait until P1 has been able to update the inode usage - * count so that the inode will stay in use until everybody has - * closed it.. - */ -static struct inode_hash_entry { - struct inode * inode; - int updating; -} hash_table[NR_IHASH]; - -static struct inode * first_inode; -static struct wait_queue * inode_wait = NULL; -/* Keep these next two contiguous in memory for sysctl.c */ int nr_inodes = 0, nr_free_inodes = 0; int max_inodes = NR_INODE; -static inline int const hashfn(kdev_t dev, unsigned int i) -{ - return (HASHDEV(dev) ^ i) % NR_IHASH; -} +#define INODE_HASHSZ 1024 -static inline struct inode_hash_entry * const hash(kdev_t dev, int i) -{ - return hash_table + hashfn(dev, i); -} +static struct inode *inode_hash[INODE_HASHSZ]; -static inline void insert_inode_free(struct inode *inode) -{ - struct inode * prev, * next = first_inode; +/* All the details of hashing and lookup. */ +#define hashfn(dev, i) ((HASHDEV(dev) + ((i) ^ ((i) >> 10))) & (INODE_HASHSZ - 1)) - first_inode = inode; - prev = next->i_prev; - inode->i_next = next; - inode->i_prev = prev; - prev->i_next = inode; - next->i_prev = inode; +__inline__ void insert_inode_hash(struct inode *inode) +{ + struct inode **htable = &inode_hash[hashfn(inode->i_dev, inode->i_ino)]; + if((inode->i_hash_next = *htable) != NULL) + (*htable)->i_hash_pprev = &inode->i_hash_next; + *htable = inode; + inode->i_hash_pprev = htable; } -static inline void remove_inode_free(struct inode *inode) +#define hash_inode(inode) insert_inode_hash(inode) + +static inline void unhash_inode(struct inode *inode) { - if (first_inode == inode) - first_inode = first_inode->i_next; - if (inode->i_next) - inode->i_next->i_prev = inode->i_prev; - if (inode->i_prev) - inode->i_prev->i_next = inode->i_next; - inode->i_next = inode->i_prev = NULL; + if(inode->i_hash_pprev) { + if(inode->i_hash_next) + inode->i_hash_next->i_hash_pprev = inode->i_hash_pprev; + *(inode->i_hash_pprev) = inode->i_hash_next; + inode->i_hash_pprev = NULL; + } } -void insert_inode_hash(struct inode *inode) +static inline struct inode *find_inode(unsigned int hashent, + kdev_t dev, unsigned long ino) { - struct inode_hash_entry *h; - h = hash(inode->i_dev, inode->i_ino); + struct inode *inode; - inode->i_hash_next = h->inode; - inode->i_hash_prev = NULL; - if (inode->i_hash_next) - inode->i_hash_next->i_hash_prev = inode; - h->inode = inode; + for(inode = inode_hash[hashent]; inode; inode = inode->i_hash_next) + if(inode->i_dev == dev && inode->i_ino == ino) + break; + return inode; } -static inline void remove_inode_hash(struct inode *inode) -{ - struct inode_hash_entry *h; - h = hash(inode->i_dev, inode->i_ino); +/* Free list queue and management. */ +static struct free_inode_queue { + struct inode *head; + struct inode **last; +} free_inodes = { NULL, &free_inodes.head }; - if (h->inode == inode) - h->inode = inode->i_hash_next; - if (inode->i_hash_next) - inode->i_hash_next->i_hash_prev = inode->i_hash_prev; - if (inode->i_hash_prev) - inode->i_hash_prev->i_hash_next = inode->i_hash_next; - inode->i_hash_prev = inode->i_hash_next = NULL; +static inline void put_inode_head(struct inode *inode) +{ + if((inode->i_next = free_inodes.head) != NULL) + free_inodes.head->i_pprev = &inode->i_next; + else + free_inodes.last = &inode->i_next; + free_inodes.head = inode; + inode->i_pprev = &free_inodes.head; + nr_free_inodes++; } -static inline void put_last_free(struct inode *inode) +static inline void put_inode_last(struct inode *inode) { - remove_inode_free(inode); - inode->i_prev = first_inode->i_prev; - inode->i_prev->i_next = inode; - inode->i_next = first_inode; - inode->i_next->i_prev = inode; + inode->i_next = NULL; + inode->i_pprev = free_inodes.last; + *free_inodes.last = inode; + free_inodes.last = &inode->i_next; + nr_free_inodes++; } -int grow_inodes(void) +static inline void remove_free_inode(struct inode *inode) { - struct inode * inode; - int i; - - if (!(inode = (struct inode*) get_free_page(GFP_KERNEL))) - return -ENOMEM; - - i=PAGE_SIZE / sizeof(struct inode); - nr_inodes += i; - nr_free_inodes += i; + if(inode->i_pprev) { + if(inode->i_next) + inode->i_next->i_pprev = inode->i_pprev; + else + free_inodes.last = inode->i_pprev; + *inode->i_pprev = inode->i_next; + inode->i_pprev = NULL; + nr_free_inodes--; + } +} - if (!first_inode) - inode->i_next = inode->i_prev = first_inode = inode++, i--; +/* This is the in-use queue, if i_count > 0 (as far as we can tell) + * the sucker is here. + */ +static struct inode *inuse_list = NULL; - for ( ; i ; i-- ) - insert_inode_free(inode++); - return 0; +static inline void put_inuse(struct inode *inode) +{ + if((inode->i_next = inuse_list) != NULL) + inuse_list->i_pprev = &inode->i_next; + inuse_list = inode; + inode->i_pprev = &inuse_list; } -unsigned long inode_init(unsigned long start, unsigned long end) +static inline void remove_inuse(struct inode *inode) { - memset(hash_table, 0, sizeof(hash_table)); - first_inode = NULL; - return start; + if(inode->i_pprev) { + if(inode->i_next) + inode->i_next->i_pprev = inode->i_pprev; + *inode->i_pprev = inode->i_next; + inode->i_pprev = NULL; + } } +/* Locking and unlocking inodes, plus waiting for locks to clear. */ static void __wait_on_inode(struct inode *); -static inline void wait_on_inode(struct inode * inode) +static inline void wait_on_inode(struct inode *inode) { - if (inode->i_lock) + if(inode->i_lock) __wait_on_inode(inode); } -static inline void lock_inode(struct inode * inode) +static inline void lock_inode(struct inode *inode) { - wait_on_inode(inode); + if(inode->i_lock) + __wait_on_inode(inode); inode->i_lock = 1; } -static inline void unlock_inode(struct inode * inode) +static inline void unlock_inode(struct inode *inode) { inode->i_lock = 0; wake_up(&inode->i_wait); } -/* - * Note that we don't want to disturb any wait-queues when we discard - * an inode. - * - * Argghh. Got bitten by a gcc problem with inlining: no way to tell - * the compiler that the inline asm function 'memset' changes 'inode'. - * I've been searching for the bug for days, and was getting desperate. - * Finally looked at the assembler output... Grrr. - * - * The solution is the weird use of 'volatile'. Ho humm. Have to report - * it to the gcc lists, and hope we can do this more cleanly some day.. - */ -void clear_inode(struct inode * inode) +static void __wait_on_inode(struct inode * inode) { - struct wait_queue * wait; + struct wait_queue wait = { current, NULL }; + + add_wait_queue(&inode->i_wait, &wait); +repeat: + current->state = TASK_UNINTERRUPTIBLE; + if (inode->i_lock) { + schedule(); + goto repeat; + } + remove_wait_queue(&inode->i_wait, &wait); + current->state = TASK_RUNNING; +} + +/* Clear an inode of all it's identity, this is exported to the world. */ +void clear_inode(struct inode *inode) +{ + struct wait_queue *wait; + + /* So we don't disappear. */ + inode->i_count++; truncate_inode_pages(inode, 0); wait_on_inode(inode); - if (IS_WRITABLE(inode)) { - if (inode->i_sb && inode->i_sb->dq_op) - inode->i_sb->dq_op->drop(inode); - } - remove_inode_hash(inode); - remove_inode_free(inode); - wait = ((volatile struct inode *) inode)->i_wait; - if (inode->i_count) - nr_free_inodes++; - memset(inode,0,sizeof(*inode)); - ((volatile struct inode *) inode)->i_wait = wait; - insert_inode_free(inode); + if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op) + inode->i_sb->dq_op->drop(inode); + + if(--inode->i_count > 0) + remove_inuse(inode); + else + remove_free_inode(inode); + unhash_inode(inode); + wait = inode->i_wait; + memset(inode, 0, sizeof(*inode)); barrier(); + inode->i_wait = wait; + put_inode_head(inode); /* Pages zapped, put at the front. */ } +/* These check the validity of a mount/umount type operation, we essentially + * check if there are any inodes hanging around which prevent this operation + * from occurring. We also clear out clean inodes referencing this device. + */ int fs_may_mount(kdev_t dev) { - struct inode * inode, * next; - int i; + struct inode *inode; + int pass = 0; - next = first_inode; - for (i = nr_inodes ; i > 0 ; i--) { - inode = next; - next = inode->i_next; /* clear_inode() changes the queues.. */ - if (inode->i_dev != dev) - continue; - if (inode->i_count || inode->i_dirt || inode->i_lock) + inode = free_inodes.head; +repeat: + while(inode) { + struct inode *next = inode->i_next; + if(inode->i_dev != dev) + goto next; + if(inode->i_count || inode->i_dirt || inode->i_lock) return 0; clear_inode(inode); + next: + inode = next; + } + if(pass == 0) { + inode = inuse_list; + pass = 1; + goto repeat; } - return 1; + return 1; /* Tis' cool bro. */ } -int fs_may_umount(kdev_t dev, struct inode * mount_root) +int fs_may_umount(kdev_t dev, struct inode *iroot) { - struct inode * inode; - int i; + struct inode *inode; + int pass = 0; - inode = first_inode; - for (i=0 ; i < nr_inodes ; i++, inode = inode->i_next) { - if (inode->i_dev != dev || !inode->i_count) + inode = free_inodes.head; +repeat: + for(; inode; inode = inode->i_next) { + if(inode->i_dev != dev || !inode->i_count) continue; - if (inode == mount_root && inode->i_count == - (inode->i_mount != inode ? 1 : 2)) + if(inode == iroot && + (inode->i_count == (inode->i_mount == inode ? 2 : 1))) continue; return 0; } - return 1; + if(pass == 0) { + inode = inuse_list; + pass = 1; + goto repeat; + } + return 1; /* Tis' cool bro. */ } +/* This belongs in file_table.c, not here... */ int fs_may_remount_ro(kdev_t dev) { struct file * file; @@ -237,79 +242,70 @@ int fs_may_remount_ro(kdev_t dev) if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2)) return 0; } - return 1; + return 1; /* Tis' cool bro. */ } -static void write_inode(struct inode * inode) +/* Reading/writing inodes. */ +static void write_inode(struct inode *inode) { - if (!inode->i_dirt) - return; - wait_on_inode(inode); - if (!inode->i_dirt) - return; - if (!inode->i_sb || !inode->i_sb->s_op || !inode->i_sb->s_op->write_inode) { - inode->i_dirt = 0; - return; + if(inode->i_dirt) { + wait_on_inode(inode); + if(inode->i_dirt) { + if(inode->i_sb && + inode->i_sb->s_op && + inode->i_sb->s_op->write_inode) { + inode->i_lock = 1; + inode->i_sb->s_op->write_inode(inode); + unlock_inode(inode); + } else { + inode->i_dirt = 0; + } + } } - inode->i_lock = 1; - inode->i_sb->s_op->write_inode(inode); - unlock_inode(inode); } -static inline void read_inode(struct inode * inode) +static inline void read_inode(struct inode *inode) { - lock_inode(inode); - if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode) + if(inode->i_sb && + inode->i_sb->s_op && + inode->i_sb->s_op->read_inode) { + lock_inode(inode); inode->i_sb->s_op->read_inode(inode); - unlock_inode(inode); + unlock_inode(inode); + } } -/* POSIX UID/GID verification for setting inode attributes */ int inode_change_ok(struct inode *inode, struct iattr *attr) { - /* - * If force is set do it anyway. - */ - - if (attr->ia_valid & ATTR_FORCE) - return 0; + if(!(attr->ia_valid & ATTR_FORCE)) { + unsigned short fsuid = current->fsuid; + uid_t iuid = inode->i_uid; + int not_fsuser = !fsuser(); - /* Make sure a caller can chown */ - if ((attr->ia_valid & ATTR_UID) && - (current->fsuid != inode->i_uid || - attr->ia_uid != inode->i_uid) && !fsuser()) - return -EPERM; + if(((attr->ia_valid & ATTR_UID) && + ((fsuid != iuid) || + (attr->ia_uid != iuid)) && not_fsuser) || - /* Make sure caller can chgrp */ - if ((attr->ia_valid & ATTR_GID) && - (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid) && - !fsuser()) - return -EPERM; + ((attr->ia_valid & ATTR_GID) && + (!in_group_p(attr->ia_gid) && + (attr->ia_gid != inode->i_gid))) || - /* Make sure a caller can chmod */ - if (attr->ia_valid & ATTR_MODE) { - if ((current->fsuid != inode->i_uid) && !fsuser()) + ((attr->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)) && + (fsuid != iuid) && not_fsuser)) return -EPERM; - /* Also check the setgid bit! */ - if (!fsuser() && !in_group_p((attr->ia_valid & ATTR_GID) ? attr->ia_gid : - inode->i_gid)) - attr->ia_mode &= ~S_ISGID; - } - /* Check for setting the inode time */ - if ((attr->ia_valid & ATTR_ATIME_SET) && - ((current->fsuid != inode->i_uid) && !fsuser())) - return -EPERM; - if ((attr->ia_valid & ATTR_MTIME_SET) && - ((current->fsuid != inode->i_uid) && !fsuser())) - return -EPERM; + if(attr->ia_valid & ATTR_MODE) { + gid_t grp; + if(fsuid != iuid && not_fsuser) + return -EPERM; + grp = attr->ia_valid & ATTR_GID ? attr->ia_gid : inode->i_gid; + if(not_fsuser && !in_group_p(grp)) + attr->ia_mode &= ~S_ISGID; + } + } return 0; } -/* - * Set the appropriate attributes from an attribute structure into - * the inode structure. - */ void inode_setattr(struct inode *inode, struct iattr *attr) { if (attr->ia_valid & ATTR_UID) @@ -332,17 +328,8 @@ void inode_setattr(struct inode *inode, struct iattr *attr) inode->i_dirt = 1; } -/* - * notify_change is called for inode-changing operations such as - * chown, chmod, utime, and truncate. It is guaranteed (unlike - * write_inode) to be called from the context of the user requesting - * the change. - */ - -int notify_change(struct inode * inode, struct iattr *attr) +int notify_change(struct inode *inode, struct iattr *attr) { - int retval; - attr->ia_ctime = CURRENT_TIME; if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME)) { if (!(attr->ia_valid & ATTR_ATIME_SET)) @@ -351,303 +338,320 @@ int notify_change(struct inode * inode, struct iattr *attr) attr->ia_mtime = attr->ia_ctime; } - if (inode->i_sb && inode->i_sb->s_op && + if (inode->i_sb && + inode->i_sb->s_op && inode->i_sb->s_op->notify_change) return inode->i_sb->s_op->notify_change(inode, attr); - if ((retval = inode_change_ok(inode, attr)) != 0) - return retval; + if(inode_change_ok(inode, attr) != 0) + return -EPERM; inode_setattr(inode, attr); return 0; } -/* - * bmap is needed for demand-loading and paging: if this function - * doesn't exist for a filesystem, then those things are impossible: - * executables cannot be run from the filesystem etc... - * - * This isn't as bad as it sounds: the read-routines might still work, - * so the filesystem would be otherwise ok (for example, you might have - * a DOS filesystem, which doesn't lend itself to bmap very well, but - * you could still transfer files to/from the filesystem) - */ -int bmap(struct inode * inode, int block) +int bmap(struct inode *inode, int block) { - if (inode->i_op && inode->i_op->bmap) - return inode->i_op->bmap(inode,block); + if(inode->i_op && inode->i_op->bmap) + return inode->i_op->bmap(inode, block); return 0; } void invalidate_inodes(kdev_t dev) { - struct inode * inode, * next; - int i; + struct inode *inode; + int pass = 0; - next = first_inode; - for(i = nr_inodes ; i > 0 ; i--) { - inode = next; - next = inode->i_next; /* clear_inode() changes the queues.. */ - if (inode->i_dev != dev) - continue; - if (inode->i_count || inode->i_dirt || inode->i_lock) { - printk("VFS: inode busy on removed device %s\n", - kdevname(dev)); - continue; - } + inode = free_inodes.head; +repeat: + while(inode) { + struct inode *next = inode->i_next; + if(inode->i_dev != dev) + goto next; clear_inode(inode); + next: + inode = next; + } + if(pass == 0) { + inode = inuse_list; + pass = 1; + goto repeat; } } void sync_inodes(kdev_t dev) { - int i; - struct inode * inode; + struct inode *inode; + int pass = 0; - inode = first_inode; - for(i = 0; i < nr_inodes*2; i++, inode = inode->i_next) { - if (dev && inode->i_dev != dev) - continue; + inode = free_inodes.head; +repeat: + while(inode) { + struct inode *next = inode->i_next; + if(dev && inode->i_dev != dev) + goto next; wait_on_inode(inode); - if (inode->i_dirt) - write_inode(inode); + write_inode(inode); + next: + inode = next; + } + if(pass == 0) { + inode = inuse_list; + pass = 1; + goto repeat; } } -void iput(struct inode * inode) +static struct wait_queue *inode_wait, *update_wait; + +void iput(struct inode *inode) { - if (!inode) + if(!inode) return; wait_on_inode(inode); - if (!inode->i_count) { - printk("VFS: iput: trying to free free inode\n"); - printk("VFS: device %s, inode %lu, mode=0%07o\n", - kdevname(inode->i_rdev), inode->i_ino, (int) inode->i_mode); + if(!inode->i_count) { + printk("VFS: Freeing free inode, tell DaveM\n"); return; } - if (inode->i_pipe) + if(inode->i_pipe) wake_up_interruptible(&PIPE_WAIT(*inode)); -repeat: - if (inode->i_count>1) { +we_slept: + if(inode->i_count > 1) { inode->i_count--; - return; + } else { + wake_up(&inode_wait); + if(inode->i_pipe) { + free_page((unsigned long)PIPE_BASE(*inode)); + PIPE_BASE(*inode) = NULL; + } + if(inode->i_sb && + inode->i_sb->s_op && + inode->i_sb->s_op->put_inode) { + inode->i_sb->s_op->put_inode(inode); + if(!inode->i_nlink) + return; + } + if(inode->i_dirt) { + write_inode(inode); + wait_on_inode(inode); + goto we_slept; + } + if(IS_WRITABLE(inode) && + inode->i_sb && + inode->i_sb->dq_op) { + inode->i_lock = 1; + inode->i_sb->dq_op->drop(inode); + unlock_inode(inode); + goto we_slept; + } + /* There is a serious race leading to here, watch out. */ + if(--inode->i_count == 0) { + remove_inuse(inode); + put_inode_last(inode); /* Place at end of LRU free queue */ + } } +} - wake_up(&inode_wait); - if (inode->i_pipe) { - unsigned long page = (unsigned long) PIPE_BASE(*inode); - PIPE_BASE(*inode) = NULL; - free_page(page); - } +static kmem_cache_t *inode_cachep; - if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->put_inode) { - inode->i_sb->s_op->put_inode(inode); - if (!inode->i_nlink) - return; - } +static void grow_inodes(void) +{ + int i = 16; - if (inode->i_dirt) { - write_inode(inode); /* we can sleep - so do again */ - wait_on_inode(inode); - goto repeat; + while(i--) { + struct inode *inode; + + inode = kmem_cache_alloc(inode_cachep, SLAB_KERNEL); + if(!inode) + return; + memset(inode, 0, sizeof(*inode)); + put_inode_head(inode); + nr_inodes++; } +} - if (IS_WRITABLE(inode)) { - if (inode->i_sb && inode->i_sb->dq_op) { - /* Here we can sleep also. Let's do it again - * Dmitry Gorodchanin 02/11/96 - */ - inode->i_lock = 1; - inode->i_sb->dq_op->drop(inode); - unlock_inode(inode); - goto repeat; - } +/* We have to be really careful, it's really easy to run yourself into + * inefficient sequences of events. The first problem is that when you + * steal a non-referenced inode you run the risk of zaping a considerable + * number of page cache entries, which might get refernced once again. + * But if you are growing the inode set to quickly, you suck up ram + * and cause other problems. + * + * We approach the problem in the following way, we take two things into + * consideration. Firstly we take a look at how much we have "committed" + * to this inode already (i_nrpages), this accounts for the cost of getting + * those pages back if someone should reference that inode soon. We also + * attempt to factor in i_blocks, which says "how much of a problem could + * this potentially be". It still needs some tuning though. -DaveM + */ +#define BLOCK_FACTOR_SHIFT 5 /* It is not factored in as much. */ +static struct inode *find_best_candidate_weighted(struct inode *inode) +{ + struct inode *best = NULL; + + if(inode) { + unsigned long bestscore = 1000; + int limit = nr_free_inodes >> 2; + do { + if(!(inode->i_lock | inode->i_dirt)) { + int myscore = inode->i_nrpages; + + myscore += (inode->i_blocks >> BLOCK_FACTOR_SHIFT); + if(myscore < bestscore) { + bestscore = myscore; + best = inode; + } + } + inode = inode->i_next; + } while(inode && --limit); } - - inode->i_count--; + return best; +} - if (inode->i_mmap) { - printk("iput: inode %lu on device %s still has mappings.\n", - inode->i_ino, kdevname(inode->i_dev)); - inode->i_mmap = NULL; +static inline struct inode *find_best_free(struct inode *inode) +{ + if(inode) { + int limit = nr_free_inodes >> 5; + do { + if(!inode->i_nrpages) + return inode; + inode = inode->i_next; + } while(inode && --limit); } - - nr_free_inodes++; - return; + return NULL; } -struct inode * get_empty_inode(void) +struct inode *get_empty_inode(void) { static int ino = 0; - struct inode * inode, * best; - unsigned long badness; - int i; + struct inode *inode; - if (nr_inodes < max_inodes && nr_free_inodes < (nr_inodes >> 1)) - grow_inodes(); repeat: - inode = first_inode; - best = NULL; - badness = 1000; - for (i = nr_inodes/2; i > 0; i--,inode = inode->i_next) { - if (!inode->i_count) { - unsigned long i = 999; - if (!(inode->i_lock | inode->i_dirt)) - i = inode->i_nrpages; - if (i < badness) { - best = inode; - if (!i) - goto found_good; - badness = i; - } - } - } - if (nr_inodes < max_inodes) { - if (grow_inodes() == 0) - goto repeat; - best = NULL; - } - if (!best) { - printk("VFS: No free inodes - contact Linus\n"); - sleep_on(&inode_wait); + inode = find_best_free(free_inodes.head); + if(!inode) + goto pressure; +got_it: + inode->i_count++; + truncate_inode_pages(inode, 0); + wait_on_inode(inode); + if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op) + inode->i_sb->dq_op->drop(inode); + unhash_inode(inode); + remove_free_inode(inode); + + memset(inode, 0, sizeof(*inode)); + inode->i_count = 1; + inode->i_nlink = 1; + inode->i_version = ++event; + sema_init(&inode->i_sem, 1); + inode->i_ino = ++ino; + inode->i_dev = 0; + put_inuse(inode); + return inode; +pressure: + if(nr_inodes < max_inodes) { + grow_inodes(); goto repeat; } - if (best->i_lock) { - wait_on_inode(best); + inode = find_best_candidate_weighted(free_inodes.head); + if(!inode) { + printk("VFS: No free inodes, contact DaveM\n"); + sleep_on(&inode_wait); goto repeat; } - if (best->i_dirt) { - write_inode(best); + if(inode->i_lock) { + wait_on_inode(inode); goto repeat; - } - if (best->i_count) + } else if(inode->i_dirt) { + write_inode(inode); goto repeat; -found_good: - clear_inode(best); - best->i_count = 1; - best->i_nlink = 1; - best->i_version = ++event; - best->i_sem.count = 1; - best->i_ino = ++ino; - best->i_dev = 0; - nr_free_inodes--; - if (nr_free_inodes < 0) { - printk ("VFS: get_empty_inode: bad free inode count.\n"); - nr_free_inodes = 0; } - return best; + goto got_it; } -struct inode * get_pipe_inode(void) +struct inode *get_pipe_inode(void) { - struct inode * inode; extern struct inode_operations pipe_inode_operations; - - if (!(inode = get_empty_inode())) - return NULL; - if (!(PIPE_BASE(*inode) = (char*) __get_free_page(GFP_USER))) { - iput(inode); - return NULL; + struct inode *inode = get_empty_inode(); + + if(inode) { + unsigned long page = __get_free_page(GFP_USER); + if(!page) { + iput(inode); + inode = NULL; + } else { + PIPE_BASE(*inode) = (char *) page; + inode->i_op = &pipe_inode_operations; + inode->i_count = 2; + PIPE_WAIT(*inode) = NULL; + PIPE_START(*inode) = PIPE_LEN(*inode) = 0; + PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0; + PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; + PIPE_LOCK(*inode) = 0; + inode->i_pipe = 1; + inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_blksize = PAGE_SIZE; + } } - inode->i_op = &pipe_inode_operations; - inode->i_count = 2; /* sum of readers/writers */ - PIPE_WAIT(*inode) = NULL; - PIPE_START(*inode) = PIPE_LEN(*inode) = 0; - PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0; - PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; - PIPE_LOCK(*inode) = 0; - inode->i_pipe = 1; - inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_blksize = PAGE_SIZE; return inode; } -struct inode *__iget(struct super_block * sb, int nr, int crossmntp) -{ - static struct wait_queue * update_wait = NULL; - struct inode_hash_entry * h; - struct inode * inode; - struct inode * empty = NULL; +static int inode_updating[INODE_HASHSZ]; - if (!sb) - panic("VFS: iget with sb==NULL"); - h = hash(sb->s_dev, nr); -repeat: - for (inode = h->inode; inode ; inode = inode->i_hash_next) - if (inode->i_dev == sb->s_dev && inode->i_ino == nr) - goto found_it; - if (!empty) { - /* - * If we sleep here before we have found an inode - * we need to make sure nobody does anything bad - * to the inode while we sleep, because otherwise - * we may return an inode that is not valid any - * more when we wake up.. - */ - h->updating++; - empty = get_empty_inode(); - if (!--h->updating) - wake_up(&update_wait); - if (empty) - goto repeat; - return (NULL); - } - inode = empty; - inode->i_sb = sb; - inode->i_dev = sb->s_dev; - inode->i_ino = nr; - inode->i_flags = sb->s_flags; - put_last_free(inode); - insert_inode_hash(inode); - read_inode(inode); - goto return_it; - -found_it: - if (!inode->i_count) - nr_free_inodes--; - inode->i_count++; - wait_on_inode(inode); - if (inode->i_dev != sb->s_dev || inode->i_ino != nr) { - printk("Whee.. inode changed from under us. Tell Linus\n"); - iput(inode); - goto repeat; - } - if (crossmntp && inode->i_mount) { - struct inode * tmp = inode->i_mount; - tmp->i_count++; - iput(inode); - inode = tmp; +struct inode *__iget(struct super_block *sb, int nr, int crossmntp) +{ + unsigned int hashent = hashfn(sb->s_dev, nr); + struct inode *inode, *empty = NULL; + +we_slept: + if((inode = find_inode(hashent, sb->s_dev, nr)) == NULL) { + if(empty == NULL) { + inode_updating[hashent]++; + empty = get_empty_inode(); + if(!--inode_updating[hashent]) + wake_up(&update_wait); + goto we_slept; + } + inode = empty; + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_ino = nr; + inode->i_flags = sb->s_flags; + hash_inode(inode); + read_inode(inode); + } else { + if(!inode->i_count++) { + remove_free_inode(inode); + put_inuse(inode); + } wait_on_inode(inode); + if(crossmntp && inode->i_mount) { + struct inode *mp = inode->i_mount; + mp->i_count++; + iput(inode); + wait_on_inode(inode = mp); + } + if(empty) + iput(empty); } - if (empty) - iput(empty); - -return_it: - while (h->updating) + while(inode_updating[hashent]) sleep_on(&update_wait); return inode; } -/* - * The "new" scheduling primitives (new as of 0.97 or so) allow this to - * be done without disabling interrupts (other than in the actual queue - * updating things: only a couple of 386 instructions). This should be - * much better for interrupt latency. - */ -static void __wait_on_inode(struct inode * inode) +void inode_init(void) { - struct wait_queue wait = { current, NULL }; + int i; - add_wait_queue(&inode->i_wait, &wait); -repeat: - current->state = TASK_UNINTERRUPTIBLE; - if (inode->i_lock) { - schedule(); - goto repeat; - } - remove_wait_queue(&inode->i_wait, &wait); - current->state = TASK_RUNNING; + inode_cachep = kmem_cache_create("inode", sizeof(struct inode), + sizeof(unsigned long) * 4, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if(!inode_cachep) + panic("Cannot create inode SLAB cache\n"); + + for(i = 0; i < INODE_HASHSZ; i++) + inode_hash[i] = NULL; } diff --git a/fs/ioctl.c b/fs/ioctl.c index aca3e287a..6766506a8 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -10,6 +10,8 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/termios.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <linux/fcntl.h> /* for f_flags values */ #include <asm/uaccess.h> @@ -47,45 +49,56 @@ static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg) asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) { struct file * filp; - int on, error; + unsigned int flag; + int on, error = -EBADF; + lock_kernel(); if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) - return -EBADF; + goto out; + error = 0; switch (cmd) { case FIOCLEX: FD_SET(fd, ¤t->files->close_on_exec); - return 0; + break; case FIONCLEX: FD_CLR(fd, ¤t->files->close_on_exec); - return 0; + break; case FIONBIO: if ((error = get_user(on, (int *)arg)) != 0) - return error; + break; + flag = O_NONBLOCK; +#ifdef __sparc__ + /* SunOS compatability item. */ + if(O_NONBLOCK != O_NDELAY) + flag |= O_NDELAY; +#endif if (on) - filp->f_flags |= O_NONBLOCK; + filp->f_flags |= flag; else - filp->f_flags &= ~O_NONBLOCK; - return 0; + filp->f_flags &= ~flag; + break; case FIOASYNC: /* O_SYNC is not yet implemented, but it's here for completeness. */ if ((error = get_user(on, (int *)arg)) != 0) - return error; + break; if (on) filp->f_flags |= O_SYNC; else filp->f_flags &= ~O_SYNC; - return 0; + break; default: if (filp->f_inode && S_ISREG(filp->f_inode->i_mode)) - return file_ioctl(filp, cmd, arg); - - if (filp->f_op && filp->f_op->ioctl) - return filp->f_op->ioctl(filp->f_inode, filp, cmd, arg); - - return -ENOTTY; + error = file_ioctl(filp, cmd, arg); + else if (filp->f_op && filp->f_op->ioctl) + error = filp->f_op->ioctl(filp->f_inode, filp, cmd, arg); + else + error = -ENOTTY; } +out: + unlock_kernel(); + return error; } diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index ea193d7b4..7a4943ede 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -31,7 +31,7 @@ static struct file_operations isofs_dir_operations = NULL, /* read */ NULL, /* write - bad */ isofs_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* no special open code */ NULL, /* no special release code */ @@ -131,28 +131,12 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, return 0; while (filp->f_pos < inode->i_size) { - int de_len, next_offset; + int de_len; #ifdef DEBUG printk("Block, offset, f_pos: %x %x %x\n", block, offset, filp->f_pos); printk("inode->i_size = %x\n",inode->i_size); #endif - /* Next directory_record on next CDROM sector */ - if (offset >= bufsize) { -#ifdef DEBUG - printk("offset >= bufsize\n"); -#endif - brelse(bh); - offset = 0; - block = isofs_bmap(inode, (filp->f_pos) >> bufbits); - if (!block) - return 0; - bh = breada(inode->i_dev, block, bufsize, filp->f_pos, inode->i_size); - if (!bh) - return 0; - continue; - } - de = (struct iso_directory_record *) (bh->b_data + offset); inode_number = (block << bufbits) + (offset & (bufsize - 1)); @@ -166,11 +150,16 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, CDROM sector. If we are at the end of the directory, we kick out of the while loop. */ - if (de_len == 0) { + if ((de_len == 0) || (offset == bufsize) ) { brelse(bh); filp->f_pos = ((filp->f_pos & ~(ISOFS_BLOCK_SIZE - 1)) + ISOFS_BLOCK_SIZE); offset = 0; + if( filp->f_pos >= inode->i_size ) + { + return 0; + } + block = isofs_bmap(inode, (filp->f_pos) >> bufbits); if (!block) return 0; @@ -180,40 +169,18 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, continue; } - /* Make sure that the entire directory record is in the - current bh block. - If not, put the two halves together in "tmpde" */ - next_offset = offset + de_len; - if (next_offset > bufsize) { -#ifdef DEBUG - printk("next_offset (%x) > bufsize (%x)\n",next_offset,bufsize); -#endif - next_offset &= (bufsize - 1); - memcpy(tmpde, de, bufsize - offset); - brelse(bh); - block = isofs_bmap(inode, (filp->f_pos + de_len) >> bufbits); - if (!block) - { - return 0; - } - - bh = breada(inode->i_dev, block, bufsize, - filp->f_pos, - inode->i_size); - if (!bh) - { -#ifdef DEBUG - printk("!bh block=%ld, bufsize=%ld\n",block,bufsize); - printk("filp->f_pos = %ld\n",filp->f_pos); - printk("inode->i_size = %ld\n", inode->i_size); -#endif - return 0; - } - - memcpy(bufsize - offset + (char *) tmpde, bh->b_data, next_offset); - de = tmpde; + offset += de_len; + if (offset > bufsize) { + /* + * This would only normally happen if we had + * a buggy cdrom image. All directory + * entries should terminate with a null size + * or end exactly at the end of the sector. + */ + printk("next_offset (%x) > bufsize (%lx)\n", + offset,bufsize); + break; } - offset = next_offset; /* Handle the case of the '.' directory */ if (de->name_len[0] == 1 && de->name[0] == 0) { diff --git a/fs/isofs/file.c b/fs/isofs/file.c index 0d5c1ba5c..d14a558a0 100644 --- a/fs/isofs/file.c +++ b/fs/isofs/file.c @@ -27,7 +27,7 @@ static struct file_operations isofs_file_operations = { generic_file_read, /* read */ NULL, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ generic_file_mmap, /* mmap */ NULL, /* no special open is needed */ diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 086872bf2..8d9ce9d96 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1,6 +1,6 @@ /* * linux/fs/isofs/inode.c - * + * * (C) 1991 Linus Torvalds - minix filesystem * 1992, 1993, 1994 Eric Youngdale Modified for ISO9660 filesystem. * 1994 Eberhard Moenkeberg - multi session handling. @@ -8,6 +8,7 @@ * */ +#include <linux/config.h> #include <linux/module.h> #include <linux/stat.h> @@ -21,6 +22,7 @@ #include <linux/malloc.h> #include <linux/errno.h> #include <linux/cdrom.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -41,7 +43,7 @@ void isofs_put_super(struct super_block *sb) lock_super(sb); #ifdef LEAK_CHECK - printk("Outstanding mallocs:%d, outstanding buffers: %d\n", + printk("Outstanding mallocs:%d, outstanding buffers: %d\n", check_malloc, check_bread); #endif sb->s_dev = 0; @@ -50,7 +52,7 @@ void isofs_put_super(struct super_block *sb) return; } -static struct super_operations isofs_sops = { +static struct super_operations isofs_sops = { isofs_read_inode, NULL, /* notify_change */ NULL, /* write_inode */ @@ -85,7 +87,10 @@ static int parse_options(char *options, struct iso9660_options * popt) popt->check = 's'; /* default: strict */ popt->conversion = 'b'; /* default: no conversion */ popt->blocksize = 1024; - popt->mode = S_IRUGO; + popt->mode = S_IRUGO | S_IXUGO; /* r-x for all. The disc could + be shared with DOS machines so + virtually anything could be + a valid executable. */ popt->gid = 0; popt->uid = 0; if (!options) return 1; @@ -127,7 +132,7 @@ static int parse_options(char *options, struct iso9660_options * popt) else if (!strcmp(value,"auto")) popt->conversion = 'a'; else return 0; } - else if (value && + else if (value && (!strcmp(this_char,"block") || !strcmp(this_char,"mode") || !strcmp(this_char,"uid") || @@ -143,8 +148,8 @@ static int parse_options(char *options, struct iso9660_options * popt) if (*vpnt) return 0; switch(*this_char) { case 'b': - if ( ivalue != 512 - && ivalue != 1024 + if ( ivalue != 512 + && ivalue != 1024 && ivalue != 2048) return 0; popt->blocksize = ivalue; break; @@ -177,7 +182,7 @@ static int parse_options(char *options, struct iso9660_options * popt) * * A broken CDwriter software or drive firmware does not set new standards, * at least not if conflicting with the existing ones. - * + * * emoenke@gwdg.de */ #define WE_OBEY_THE_WRITTEN_STANDARDS 1 @@ -193,6 +198,11 @@ static unsigned int isofs_get_last_session(kdev_t dev) vol_desc_start=0; if (get_blkfops(MAJOR(dev))->ioctl!=NULL) { + /* Whoops. We must save the old FS, since otherwise + * we would destroy the kernels idea about FS on root + * mount in read_super... [chexum] + */ + unsigned long old_fs=get_fs(); inode_fake.i_rdev=dev; ms_info.addr_format=CDROM_LBA; set_fs(KERNEL_DS); @@ -200,8 +210,8 @@ static unsigned int isofs_get_last_session(kdev_t dev) NULL, CDROMMULTISESSION, (unsigned long) &ms_info); - set_fs(USER_DS); -#if 0 + set_fs(old_fs); +#if 0 printk("isofs.inode: CDROMMULTISESSION: rc=%d\n",i); if (i==0) { @@ -221,23 +231,22 @@ static unsigned int isofs_get_last_session(kdev_t dev) struct super_block *isofs_read_super(struct super_block *s,void *data, int silent) { - struct buffer_head *bh=NULL; - int iso_blknum; - unsigned int blocksize_bits; - int high_sierra; - kdev_t dev = s->s_dev; - unsigned int vol_desc_start; - int orig_zonesize; - - struct iso_volume_descriptor *vdp; - struct hs_volume_descriptor *hdp; + struct buffer_head * bh = NULL; + unsigned int blocksize; + unsigned int blocksize_bits; + kdev_t dev = s->s_dev; + struct hs_volume_descriptor * hdp; + struct hs_primary_descriptor * h_pri = NULL; + int high_sierra; + int iso_blknum; + struct iso9660_options opt; + int orig_zonesize; + struct iso_primary_descriptor * pri = NULL; + struct iso_directory_record * rootp; + struct iso_volume_descriptor * vdp; + unsigned int vol_desc_start; - struct iso_primary_descriptor *pri = NULL; - struct hs_primary_descriptor *h_pri = NULL; - struct iso_directory_record *rootp; - - struct iso9660_options opt; MOD_INC_USE_COUNT; @@ -258,7 +267,24 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, printk("gid = %d\n", opt.gid); printk("uid = %d\n", opt.uid); #endif - + + /* + * First of all, get the hardware blocksize for this device. + * If we don't know what it is, or the hardware blocksize is + * larger than the blocksize the user specified, then use + * that value. + */ + blocksize = get_hardblocksize(dev); + if( (blocksize != 0) + && (blocksize > opt.blocksize) ) + { + /* + * Force the blocksize we are going to use to be the + * hardware blocksize. + */ + opt.blocksize = blocksize; + } + blocksize_bits = 0; { int i = opt.blocksize; @@ -267,6 +293,7 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, i >>=1; } } + set_blocksize(dev, opt.blocksize); lock_super(s); @@ -274,7 +301,7 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, s->u.isofs_sb.s_high_sierra = high_sierra = 0; /* default is iso9660 */ vol_desc_start = isofs_get_last_session(dev); - + for (iso_blknum = vol_desc_start+16; iso_blknum < vol_desc_start+100; iso_blknum++) { int b = iso_blknum << (ISOFS_BLOCK_BITS-blocksize_bits); @@ -292,26 +319,26 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, vdp = (struct iso_volume_descriptor *)bh->b_data; hdp = (struct hs_volume_descriptor *)bh->b_data; - + if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) { if (isonum_711 (hdp->type) != ISO_VD_PRIMARY) goto out; if (isonum_711 (hdp->type) == ISO_VD_END) goto out; - + s->u.isofs_sb.s_high_sierra = 1; high_sierra = 1; opt.rock = 'n'; h_pri = (struct hs_primary_descriptor *)vdp; break; } - + if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) { if (isonum_711 (vdp->type) != ISO_VD_PRIMARY) goto out; if (isonum_711 (vdp->type) == ISO_VD_END) goto out; - + pri = (struct iso_primary_descriptor *)vdp; break; } @@ -326,7 +353,7 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, MOD_DEC_USE_COUNT; return NULL; } - + if(high_sierra){ rootp = (struct iso_directory_record *) h_pri->root_directory_record; #ifndef IGNORE_WRONG_MULTI_VOLUME_SPECS @@ -350,12 +377,30 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, s->u.isofs_sb.s_log_zone_size = isonum_723 (pri->logical_block_size); s->u.isofs_sb.s_max_size = isonum_733(pri->volume_space_size); } - + s->u.isofs_sb.s_ninodes = 0; /* No way to figure this out easily */ - + /* RDE: convert log zone size to bit shift */ orig_zonesize = s -> u.isofs_sb.s_log_zone_size; + + /* + * If the zone size is smaller than the hardware sector size, + * this is a fatal error. This would occur if the + * disc drive had sectors that were 2048 bytes, but the filesystem + * had blocks that were 512 bytes (which should only very rarely + * happen. + */ + if( (blocksize != 0) + && (orig_zonesize < blocksize) ) + { + printk("Logical zone size(%d) < hardware blocksize(%u)\n", + orig_zonesize, blocksize); + goto out; + + } + + switch (s -> u.isofs_sb.s_log_zone_size) { case 512: s -> u.isofs_sb.s_log_zone_size = 9; break; case 1024: s -> u.isofs_sb.s_log_zone_size = 10; break; @@ -368,22 +413,22 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, /* RDE: data zone now byte offset! */ - s->u.isofs_sb.s_firstdatazone = ((isonum_733 (rootp->extent) + + s->u.isofs_sb.s_firstdatazone = ((isonum_733 (rootp->extent) + isonum_711 (rootp->ext_attr_length)) << s -> u.isofs_sb.s_log_zone_size); s->s_magic = ISOFS_SUPER_MAGIC; - + /* The CDROM is read-only, has no nodes (devices) on it, and since all of the files appear to be owned by root, we really do not want to allow suid. (suid or devices will not show up unless we have Rock Ridge extensions) */ - + s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; - + brelse(bh); - + printk(KERN_DEBUG "Max size:%ld Log zone size:%ld\n", - s->u.isofs_sb.s_max_size, + s->u.isofs_sb.s_max_size, 1UL << s->u.isofs_sb.s_log_zone_size); printk(KERN_DEBUG "First datazone:%ld Root inode number %d\n", s->u.isofs_sb.s_firstdatazone >> s -> u.isofs_sb.s_log_zone_size, @@ -392,13 +437,25 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, if(high_sierra) printk(KERN_DEBUG "Disc in High Sierra format.\n"); unlock_super(s); /* set up enough so that it can read an inode */ - + /* * Force the blocksize to 512 for 512 byte sectors. The file * read primitives really get it wrong in a bad way if we don't * do this. + * + * Note - we should never be setting the blocksize to something + * less than the hardware sector size for the device. If we + * do, we would end up having to read larger buffers and split + * out portions to satisfy requests. + * + * Note2- the idea here is that we want to deal with the optimal + * zonesize in the filesystem. If we have it set to something less, + * then we have horrible problems with trying to piece together + * bits of adjacent blocks in order to properly read directory + * entries. By forcing the blocksize in this way, we ensure + * that we will never be required to do this. */ - if( orig_zonesize < opt.blocksize ) + if( orig_zonesize != opt.blocksize ) { opt.blocksize = orig_zonesize; blocksize_bits = 0; @@ -430,7 +487,7 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, s->u.isofs_sb.s_mode = opt.mode & 0777; s->s_blocksize = opt.blocksize; s->s_blocksize_bits = blocksize_bits; - s->s_mounted = iget(s, (isonum_733(rootp->extent) + + s->s_mounted = iget(s, (isonum_733(rootp->extent) + isonum_711(rootp->ext_attr_length)) << s -> u.isofs_sb.s_log_zone_size); unlock_super(s); @@ -476,6 +533,34 @@ int isofs_bmap(struct inode * inode,int block) printk("_isofs_bmap: block<0"); return 0; } + + /* + * If we are beyond the end of this file, don't give out any + * blocks. + */ + if( (block << ISOFS_BUFFER_BITS(inode)) >= inode->i_size ) + { + off_t max_legal_read_offset; + + /* + * If we are *way* beyond the end of the file, print a message. + * Access beyond the end of the file up to the next page boundary + * is normal, however because of the way the page cache works. + * In this case, we just return 0 so that we can properly fill + * the page with useless information without generating any + * I/O errors. + */ + max_legal_read_offset = (inode->i_size + PAGE_SIZE - 1) + & ~(PAGE_SIZE - 1); + if( (block << ISOFS_BUFFER_BITS(inode)) >= max_legal_read_offset ) + { + + printk("_isofs_bmap: block>= EOF(%d, %ld)\n", block, + inode->i_size); + } + return 0; + } + return (inode->u.isofs_i.i_first_extent >> ISOFS_BUFFER_BITS(inode)) + block; } @@ -496,7 +581,6 @@ void isofs_read_inode(struct inode * inode) struct buffer_head * bh; struct iso_directory_record * raw_inode; unsigned char *pnt = NULL; - void *cpnt = NULL; int high_sierra; int block; int volume_seq_no ; @@ -507,36 +591,12 @@ void isofs_read_inode(struct inode * inode) printk("unable to read i-node block"); goto fail; } - + pnt = ((unsigned char *) bh->b_data + (inode->i_ino & (bufsize - 1))); raw_inode = ((struct iso_directory_record *) pnt); high_sierra = inode->i_sb->u.isofs_sb.s_high_sierra; - if ((inode->i_ino & (bufsize - 1)) + *pnt > bufsize){ - int frag1, offset; - - offset = (inode->i_ino & (bufsize - 1)); - frag1 = bufsize - offset; - cpnt = kmalloc(*pnt,GFP_KERNEL); - if (cpnt == NULL) { - printk(KERN_INFO "NoMem ISO inode %lu\n",inode->i_ino); - brelse(bh); - goto fail; - } - memcpy(cpnt, bh->b_data + offset, frag1); - brelse(bh); - if (!(bh = bread(inode->i_dev,++block, bufsize))) { - kfree(cpnt); - printk("unable to read i-node block"); - goto fail; - } - offset += *pnt - bufsize; - memcpy((char *)cpnt+frag1, bh->b_data, offset); - pnt = ((unsigned char *) cpnt); - raw_inode = ((struct iso_directory_record *) pnt); - } - if (raw_inode->flags[-high_sierra] & 2) { inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR; inode->i_nlink = 1; /* Set to 1. We know there are 2, but @@ -552,7 +612,7 @@ void isofs_read_inode(struct inode * inode) for(i=0; i< raw_inode->name_len[0]; i++) if(raw_inode->name[i]=='.' || raw_inode->name[i]==';') break; - if(i == raw_inode->name_len[0] || raw_inode->name[i] == ';') + if(i == raw_inode->name_len[0] || raw_inode->name[i] == ';') inode->i_mode |= S_IXUGO; /* execute permission */ } inode->i_uid = inode->i_sb->u.isofs_sb.s_uid; @@ -571,12 +631,12 @@ void isofs_read_inode(struct inode * inode) byte of the file length. Catch this and holler. WARNING: this will make it impossible for a file to be > 16Mb on the CDROM!!!*/ - if(inode->i_sb->u.isofs_sb.s_cruft == 'y' && + if(inode->i_sb->u.isofs_sb.s_cruft == 'y' && inode->i_size & 0xff000000){ /* printk("Illegal format on cdrom. Pester manufacturer.\n"); */ inode->i_size &= 0x00ffffff; } - + if (raw_inode->interleave[0]) { printk("Interleaved files not (yet) supported.\n"); inode->i_size = 0; @@ -598,17 +658,17 @@ void isofs_read_inode(struct inode * inode) #endif #ifdef DEBUG - printk("Get inode %d: %d %d: %d\n",inode->i_ino, block, + printk("Get inode %d: %d %d: %d\n",inode->i_ino, block, ((int)pnt) & 0x3ff, inode->i_size); #endif - - inode->i_mtime = inode->i_atime = inode->i_ctime = + + inode->i_mtime = inode->i_atime = inode->i_ctime = iso_date(raw_inode->date, high_sierra); - inode->u.isofs_i.i_first_extent = (isonum_733 (raw_inode->extent) + + inode->u.isofs_i.i_first_extent = (isonum_733 (raw_inode->extent) + isonum_711 (raw_inode->ext_attr_length)) << inode -> i_sb -> u.isofs_sb.s_log_zone_size; - + inode->u.isofs_i.i_backlink = 0xffffffff; /* Will be used for previous directory */ switch (inode->i_sb->u.isofs_sb.s_conversion){ case 'a': @@ -633,31 +693,31 @@ void isofs_read_inode(struct inode * inode) /* hmm..if we want uid or gid set, override the rock ridge setting */ test_and_set_uid(&inode->i_uid, inode->i_sb->u.isofs_sb.s_uid); } - + #ifdef DEBUG printk("Inode: %x extent: %x\n",inode->i_ino, inode->u.isofs_i.i_first_extent); #endif brelse(bh); - + inode->i_op = NULL; /* get the volume sequence number */ volume_seq_no = isonum_723 (raw_inode->volume_sequence_number) ; - /* + /* * Disable checking if we see any volume number other than 0 or 1. * We could use the cruft option, but that has multiple purposes, one * of which is limiting the file size to 16Mb. Thus we silently allow * volume numbers of 0 to go through without complaining. */ - if (inode->i_sb->u.isofs_sb.s_cruft == 'n' && + if (inode->i_sb->u.isofs_sb.s_cruft == 'n' && (volume_seq_no != 0) && (volume_seq_no != 1)) { printk("Warning: defective cdrom (volume sequence number). Enabling \"cruft\" mount option.\n"); inode->i_sb->u.isofs_sb.s_cruft = 'y'; } #ifndef IGNORE_WRONG_MULTI_VOLUME_SPECS - if (inode->i_sb->u.isofs_sb.s_cruft != 'y' && + if (inode->i_sb->u.isofs_sb.s_cruft != 'y' && (volume_seq_no != 0) && (volume_seq_no != 1)) { printk("Multi volume CD somehow got mounted.\n"); } else @@ -676,10 +736,6 @@ void isofs_read_inode(struct inode * inode) else if (S_ISFIFO(inode->i_mode)) init_fifo(inode); } - if (cpnt) { - kfree (cpnt); - cpnt = NULL; - } return; fail: /* With a data error we return this information */ @@ -719,37 +775,35 @@ int isofs_lookup_grandparent(struct inode * parent, int extent) unsigned char bufbits = ISOFS_BUFFER_BITS(parent); unsigned int block,offset; int parent_dir, inode_number; - int old_offset; - void * cpnt = NULL; int result; int directory_size; struct buffer_head * bh; struct iso_directory_record * de; - + offset = 0; block = extent << (ISOFS_ZONE_BITS(parent) - bufbits); if (!(bh = bread(parent->i_dev, block, bufsize))) return -1; - + while (1 == 1) { de = (struct iso_directory_record *) (bh->b_data + offset); - if (*((unsigned char *) de) == 0) + if (*((unsigned char *) de) == 0) { brelse(bh); printk("Directory .. not found\n"); return -1; } - + offset += *((unsigned char *) de); - if (offset >= bufsize) + if (offset >= bufsize) { printk(".. Directory not in first block" " of directory.\n"); brelse(bh); return -1; } - - if (de->name_len[0] == 1 && de->name[0] == 1) + + if (de->name_len[0] == 1 && de->name[0] == 1) { parent_dir = find_rock_ridge_relocation(de, parent); directory_size = isonum_733 (de->size); @@ -761,7 +815,7 @@ int isofs_lookup_grandparent(struct inode * parent, int extent) printk("Parent dir:%x\n",parent_dir); #endif /* Now we know the extent where the parent dir starts on. */ - + result = -1; offset = 0; @@ -770,17 +824,17 @@ int isofs_lookup_grandparent(struct inode * parent, int extent) { return -1; } - + for(;;) { de = (struct iso_directory_record *) (bh->b_data + offset); inode_number = (block << bufbits)+(offset & (bufsize - 1)); - + /* If the length byte is zero, we should move on to the next CDROM sector. If we are at the end of the directory, we kick out of the while loop. */ - - if (*((unsigned char *) de) == 0) + + if ((*((unsigned char *) de) == 0) || (offset == bufsize) ) { brelse(bh); offset = 0; @@ -802,64 +856,37 @@ int isofs_lookup_grandparent(struct inode * parent, int extent) } continue; } - + /* Make sure that the entire directory record is in the current bh block. If not, we malloc a buffer, and put the two halves together, so that we can cleanly read the block. */ - old_offset = offset; offset += *((unsigned char *) de); - if (offset >= bufsize) + if (offset > bufsize) { - unsigned int frag1; - frag1 = bufsize - old_offset; - cpnt = kmalloc(*((unsigned char *) de),GFP_KERNEL); - if (!cpnt) return -1; - memcpy(cpnt, bh->b_data + old_offset, frag1); - de = (struct iso_directory_record *) ((char *)cpnt); - brelse(bh); - offset -= bufsize; - directory_size -= bufsize; - if(directory_size < 0) - { - printk("Directory size < 0\n"); - return -1; - } - block++; - if(!(bh = bread(parent->i_dev,block,bufsize))) { - kfree(cpnt); - return -1; - } - memcpy((char *)cpnt+frag1, bh->b_data, offset); + printk("Directory overrun\n"); + goto out; } - + if (find_rock_ridge_relocation(de, parent) == extent){ result = inode_number; goto out; } - - if (cpnt) { - kfree(cpnt); - cpnt = NULL; - } + } /* We go here for any condition we cannot handle. We also drop through to here at the end of the directory. */ out: - if (cpnt) { - kfree(cpnt); - cpnt = NULL; - } brelse(bh); #ifdef DEBUG printk("Resultant Inode %d\n",result); #endif return result; } - + #ifdef LEAK_CHECK #undef malloc #undef free_s @@ -894,19 +921,17 @@ static struct file_system_type iso9660_fs_type = { isofs_read_super, "iso9660", 1, NULL }; -int init_iso9660_fs(void) +__initfunc(int init_iso9660_fs(void)) { return register_filesystem(&iso9660_fs_type); } #ifdef MODULE +EXPORT_NO_SYMBOLS; + int init_module(void) { - int status; - - if ((status = init_iso9660_fs()) == 0) - register_symtab(0); - return status; + return init_iso9660_fs(); } void cleanup_module(void) @@ -915,4 +940,3 @@ void cleanup_module(void) } #endif - diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 8cdadf836..71f816b47 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -98,6 +98,12 @@ static struct buffer_head * isofs_find_entry(struct inode * dir, offset = 0; f_pos = ((f_pos & ~(ISOFS_BLOCK_SIZE - 1)) + ISOFS_BLOCK_SIZE); + + if( f_pos >= dir->i_size ) + { + return 0; + } + block = isofs_bmap(dir,f_pos>>bufbits); if (!block || !(bh = bread(dir->i_dev,block,bufsize))) return 0; diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 9f267e69d..6f4539045 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -203,6 +203,17 @@ int get_rock_ridge_filename(struct iso_directory_record * de, break; case SIG('N','M'): if (truncate) break; + /* + * If the flags are 2 or 4, this indicates '.' or '..'. + * We don't want to do anything with this, because it + * screws up the code that calls us. We don't really + * care anyways, since we can just use the non-RR + * name. + */ + if (rr->u.NM.flags & 6) { + break; + } + if (rr->u.NM.flags & ~1) { printk("Unsupported NM flag settings (%d)\n",rr->u.NM.flags); break; @@ -287,7 +298,7 @@ int parse_rock_ridge_inode(struct iso_directory_record * de, CHECK_CE; break; case SIG('E','R'): - printk("ISO9660 Extensions: "); + printk(KERN_DEBUG"ISO9660 Extensions: "); { int p; for(p=0;p<rr->u.ER.len_id;p++) printk("%c",rr->u.ER.data[p]); }; diff --git a/fs/isofs/symlink.c b/fs/isofs/symlink.c index 59489dc4b..87e544324 100644 --- a/fs/isofs/symlink.c +++ b/fs/isofs/symlink.c @@ -83,7 +83,6 @@ static int isofs_readlink(struct inode * inode, char * buffer, int buflen) { char * pnt; int i; - char c; if (!S_ISLNK(inode->i_mode)) { iput(inode); @@ -97,12 +96,12 @@ static int isofs_readlink(struct inode * inode, char * buffer, int buflen) iput(inode); if (!pnt) return 0; - i = 0; - while (i<buflen && (c = pnt[i])) { - i++; - put_user(c,buffer++); - } + i = strlen(pnt)+1; + if (i > buflen) + i = buflen; + if (copy_to_user(buffer, pnt, i)) + i = -EFAULT; kfree(pnt); return i; } diff --git a/fs/isofs/util.c b/fs/isofs/util.c index b080406cd..75183cc8f 100644 --- a/fs/isofs/util.c +++ b/fs/isofs/util.c @@ -84,10 +84,16 @@ isonum_733 (char * p) return (isonum_731 (p)); } -/* We have to convert from a MM/DD/YY format to the unix ctime format. We have to - take into account leap years and all of that good stuff. Unfortunately, the kernel - does not have the information on hand to take into account daylight savings time, - so there will be cases (roughly half the time) where the dates are off by one hour. */ +/* + * We have to convert from a MM/DD/YY format to the unix ctime format. + * We have to take into account leap years and all of that good stuff. + * Unfortunately, the kernel does not have the information on hand to + * take into account daylight savings time, but it shouldn't matter. + * The time stored should be localtime (with or without DST in effect), + * and the timezone offset should hold the offset required to get back + * to GMT. Thus we should always be correct. + */ + int iso_date(char * p, int flag) { int year, month, day, hour ,minute, second, tz; @@ -121,9 +127,33 @@ int iso_date(char * p, int flag) if (tz & 0x80) tz |= (-1 << 8); - /* timezone offset is unreliable on some disks */ - if (-48 <= tz && tz <= 52) - crtime += tz * 15 * 60; + /* + * The timezone offset is unreliable on some disks, + * so we make a sanity check. In no case is it ever + * more than 13 hours from GMT, which is 52*15min. + * The time is always stored in localtime with the + * timezone offset being what get added to GMT to + * get to localtime. Thus we need to subtract the offset + * to get to true GMT, which is what we store the time + * as internally. On the local system, the user may set + * their timezone any way they wish, of course, so GMT + * gets converted back to localtime on the receiving + * system. + * + * NOTE: mkisofs in versions prior to mkisofs-1.10 had + * the sign wrong on the timezone offset. This has now + * been corrected there too, but if you are getting screwy + * results this may be the explaination. If enough people + * complain, a user configuration option could be added + * to add the timezone offset in with the wrong sign + * for 'compatibility' with older discs, but I cannot see how + * it will matter that much. + * + * Thanks to kuhlmav@elec.canterbury.ac.nz (Volker Kuhlmann) + * for pointing out the sign error. + */ + if (-52 <= tz && tz <= 52) + crtime -= tz * 15 * 60; } return crtime; } diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile new file mode 100644 index 000000000..7c319ffc3 --- /dev/null +++ b/fs/lockd/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the linux lock manager stuff +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +O_TARGET := lockd.o +O_OBJS := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ + svcproc.o svcsubs.o mon.o xdr.o +OX_OBJS := lockd_syms.o +M_OBJS := $(O_TARGET) + +include $(TOPDIR)/Rules.make diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c new file mode 100644 index 000000000..8c41d2f39 --- /dev/null +++ b/fs/lockd/clntlock.c @@ -0,0 +1,203 @@ +/* + * linux/fs/lockd/clntlock.c + * + * Lock handling for the client side NLM implementation + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#define __KERNEL_SYSCALLS__ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/nfs_fs.h> +#include <linux/unistd.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/lockd/lockd.h> + +#define NLMDBG_FACILITY NLMDBG_CIENT + +/* + * Local function prototypes + */ +static int reclaimer(void *ptr); + +/* + * The following functions handle blocking and granting from the + * client perspective. + */ + +/* + * This is the representation of a blocked client lock. + */ +struct nlm_wait { + struct nlm_wait * b_next; /* linked list */ + struct wait_queue * b_wait; /* where to wait on */ + struct nlm_host * b_host; + struct file_lock * b_lock; /* local file lock */ + unsigned short b_reclaim; /* got to reclaim lock */ + u32 b_status; /* grant callback status */ +}; + +static struct nlm_wait * nlm_blocked = NULL; + +/* + * Block on a lock + */ +int +nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) +{ + struct nlm_wait block, **head; + int err; + u32 pstate; + + block.b_host = host; + block.b_lock = fl; + block.b_wait = NULL; + block.b_status = NLM_LCK_BLOCKED; + block.b_next = nlm_blocked; + nlm_blocked = █ + + /* Remember pseudo nsm state */ + pstate = host->h_state; + + /* Go to sleep waiting for GRANT callback. Some servers seem + * to lose callbacks, however, so we're going to poll from + * time to time just to make sure. + * + * For now, the retry frequency is pretty high; normally + * a 1 minute timeout would do. See the comment before + * nlmclnt_lock for an explanation. + */ + current->timeout = jiffies + 30 * HZ; + interruptible_sleep_on(&block.b_wait); + + for (head = &nlm_blocked; *head; head = &(*head)->b_next) { + if (*head == &block) { + *head = block.b_next; + break; + } + } + + if (!signalled()) { + *statp = block.b_status; + return 0; + } + + /* Okay, we were interrupted. Cancel the pending request + * unless the server has rebooted. + */ + if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0) + printk(KERN_NOTICE + "lockd: CANCEL call failed (errno %d)\n", -err); + + return -ERESTARTSYS; +} + +/* + * The server lockd has called us back to tell us the lock was granted + */ +u32 +nlmclnt_grant(struct nlm_lock *lock) +{ + struct nlm_wait *block; + + /* + * Look up blocked request based on arguments. + * Warning: must not use cookie to match it! + */ + for (block = nlm_blocked; block; block = block->b_next) { + if (nlm_compare_locks(block->b_lock, &lock->fl)) + break; + } + + /* Ooops, no blocked request found. */ + if (block == NULL) + return nlm_lck_denied; + + /* Alright, we found the lock. Set the return status and + * wake up the caller. + */ + block->b_status = NLM_LCK_GRANTED; + wake_up(&block->b_wait); + + return nlm_granted; +} + +/* + * The following procedures deal with the recovery of locks after a + * server crash. + */ + +/* + * Reclaim all locks on server host. We do this by spawning a separate + * reclaimer thread. + * FIXME: should bump MOD_USE_COUNT while reclaiming + */ +void +nlmclnt_recovery(struct nlm_host *host, u32 newstate) +{ + if (!host->h_reclaiming++) { + if (host->h_nsmstate == newstate) + return; + printk(KERN_WARNING + "lockd: Uh-oh! Interfering reclaims for host %s", + host->h_name); + host->h_monitored = 0; + host->h_nsmstate = newstate; + host->h_state++; + nlm_release_host(host); + } else { + host->h_monitored = 0; + host->h_nsmstate = newstate; + host->h_state++; + host->h_count++; + kernel_thread(reclaimer, host, 0); + } +} + +static int +reclaimer(void *ptr) +{ + struct nlm_host *host = (struct nlm_host *) ptr; + struct nlm_wait *block; + struct file_lock *fl; + struct inode *inode; + + /* This one ensures that our parent doesn't terminate while the + * reclaim is in progress */ + lockd_up(); + + /* First, reclaim all locks that have been granted previously. */ + do { + for (fl = file_lock_table; fl; fl = fl->fl_next) { + inode = fl->fl_file->f_inode; + if (inode->i_sb->s_magic == NFS_SUPER_MAGIC + && nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr) + && fl->fl_u.nfs_fl.state != host->h_state + && (fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) { + fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED; + nlmclnt_reclaim(host, fl); + break; + } + } + } while (fl); + + host->h_reclaiming = 0; + wake_up(&host->h_gracewait); + + /* Now, wake up all processes that sleep on a blocked lock */ + for (block = nlm_blocked; block; block = block->b_next) { + if (block->b_host == host) { + block->b_status = NLM_LCK_DENIED_GRACE_PERIOD; + wake_up(&block->b_wait); + } + } + + /* Release host handle after use */ + nlm_release_host(host); + lockd_down(); + + return 0; +} diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c new file mode 100644 index 000000000..1506a2ba6 --- /dev/null +++ b/fs/lockd/clntproc.c @@ -0,0 +1,540 @@ +/* + * linux/fs/lockd/clntproc.c + * + * RPC procedures for the client side NLM implementation + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/nfs_fs.h> +#include <linux/utsname.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/lockd/lockd.h> +#include <linux/lockd/sm_inter.h> + +#define NLMDBG_FACILITY NLMDBG_CLIENT + +static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); +static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); +static int nlmclnt_unlock(struct nlm_rqst *, struct file_lock *); +static void nlmclnt_unlock_callback(struct rpc_task *); +static void nlmclnt_cancel_callback(struct rpc_task *); +static int nlm_stat_to_errno(u32 stat); + +/* + * Cookie counter for NLM requests + */ +static u32 nlm_cookie = 0x1234; + +/* + * Initialize arguments for TEST/LOCK/UNLOCK/CANCEL calls + */ +static inline void +nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) +{ + struct nlm_args *argp = &req->a_args; + struct nlm_lock *lock = &argp->lock; + + memset(argp, 0, sizeof(*argp)); + argp->cookie = nlm_cookie++; + argp->state = nsm_local_state; + lock->fh = *NFS_FH(fl->fl_file->f_inode); + lock->caller = system_utsname.nodename; + lock->oh.data = req->a_owner; + lock->oh.len = sprintf(req->a_owner, "%d@%s", + current->pid, system_utsname.nodename); + lock->fl = *fl; +} + +/* + * Initialize arguments for GRANTED call + */ +int +nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock) +{ + struct nlm_args *argp = &call->a_args; + struct nlm_lock *alock = &argp->lock; + void *data = NULL; + + if (lock->oh.len > NLMCLNT_OHSIZE + && !(data = kmalloc(lock->oh.len, GFP_KERNEL))) + return 0; + + argp->cookie = nlm_cookie++; + argp->lock = *lock; + alock->caller = system_utsname.nodename; + if (data) + alock->oh.data = (u8 *) data; + else + alock->oh.data = call->a_owner; + memcpy(alock->oh.data, lock->oh.data, lock->oh.len); + return 1; +} + +void +nlmclnt_freegrantargs(struct nlm_rqst *call) +{ + kfree(call->a_args.lock.caller); +} + +/* + * This is the main entry point for the NLM client. + */ +int +nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) +{ + struct nfs_server *nfssrv = NFS_SERVER(inode); + struct nlm_host *host; + struct nlm_rqst reqst, *call = &reqst; + unsigned long oldmask; + int status; + + /* Always use NLM version 1 over UDP for now... */ + if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), IPPROTO_UDP, 1))) + return -ENOLCK; + + /* Create RPC client handle if not there, and copy soft + * and intr flags from NFS client. */ + if (host->h_rpcclnt == NULL) { + struct rpc_clnt *clnt; + + /* Bind an rpc client to this host handle (does not + * perform a portmapper lookup) */ + if (!(clnt = nlm_bind_host(host))) { + status = -ENOLCK; + goto done; + } + clnt->cl_softrtry = nfssrv->client->cl_softrtry; + clnt->cl_intr = nfssrv->client->cl_intr; + clnt->cl_chatty = nfssrv->client->cl_chatty; + } + + /* Keep the old signal mask */ + oldmask = current->blocked; + + /* If we're cleaning up locks because the process is exiting, + * perform the RPC call asynchronously. */ + if (cmd == F_SETLK && fl->fl_type == F_UNLCK + && (current->flags & PF_EXITING)) { + current->blocked = ~0UL; /* Mask all signals */ + call = nlmclnt_alloc_call(); + call->a_flags = RPC_TASK_ASYNC; + } else { + call->a_flags = 0; + } + call->a_host = host; + + /* Set up the argument struct */ + nlmclnt_setlockargs(call, fl); + + if (cmd == F_GETLK) { + status = nlmclnt_test(call, fl); + } else if (cmd == F_SETLK && fl->fl_type == F_UNLCK) { + status = nlmclnt_unlock(call, fl); + } else if (cmd == F_SETLK || cmd == F_SETLKW) { + call->a_args.block = (cmd == F_SETLKW)? 1 : 0; + status = nlmclnt_lock(call, fl); + } else { + status = -EINVAL; + } + + if (status < 0 && (call->a_flags & RPC_TASK_ASYNC)) + rpc_free(call); + + current->blocked = oldmask; + +done: + dprintk("lockd: clnt proc returns %d\n", status); + nlm_release_host(host); + return status; +} + +/* + * Wait while server is in grace period + */ +static inline int +nlmclnt_grace_wait(struct nlm_host *host) +{ + if (!host->h_reclaiming) + current->timeout = 10 * HZ; + interruptible_sleep_on(&host->h_gracewait); + return signalled()? -ERESTARTSYS : 0; +} + +/* + * Allocate an NLM RPC call struct + */ +struct nlm_rqst * +nlmclnt_alloc_call(void) +{ + struct nlm_rqst *call; + + while (!signalled()) { + call = (struct nlm_rqst *) rpc_allocate(RPC_TASK_ASYNC, + sizeof(struct nlm_rqst)); + if (call) + return call; + current->timeout = 5 * HZ; + current->state = TASK_INTERRUPTIBLE; + schedule(); + } + return NULL; +} + +/* + * Generic NLM call + */ +int +nlmclnt_call(struct nlm_rqst *req, u32 proc) +{ + struct nlm_host *host = req->a_host; + struct rpc_clnt *clnt; + struct nlm_args *argp = &req->a_args; + struct nlm_res *resp = &req->a_res; + int status; + + dprintk("lockd: call procedure %s on %s\n", + nlm_procname(proc), host->h_name); + + do { + if (host->h_reclaiming && !argp->reclaim) { + interruptible_sleep_on(&host->h_gracewait); + continue; + } + + /* If we have no RPC client yet, create one. */ + if ((clnt = nlm_bind_host(host)) == NULL) + return -ENOLCK; + + /* Perform the RPC call. If an error occurs, try again */ + if ((status = rpc_call(clnt, proc, argp, resp, 0)) < 0) { + dprintk("lockd: rpc_call returned error %d\n", -status); + if (status == -ERESTARTSYS) + return status; + nlm_rebind_host(host); + } else + if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD) { + dprintk("lockd: server in grace period\n"); + if (argp->reclaim) { + printk(KERN_WARNING + "lockd: spurious grace period reject?!\n"); + return -ENOLCK; + } + } else { + dprintk("lockd: server returns status %d\n", resp->status); + return 0; /* Okay, call complete */ + } + + /* Back off a little and try again */ + current->timeout = jiffies + 15 * HZ; + interruptible_sleep_on(&host->h_gracewait); + } while (!signalled()); + + return -ERESTARTSYS; +} + +/* + * Generic NLM call, async version. + */ +int +nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) +{ + struct nlm_host *host = req->a_host; + struct rpc_clnt *clnt; + struct nlm_args *argp = &req->a_args; + struct nlm_res *resp = &req->a_res; + int status; + + dprintk("lockd: call procedure %s on %s (async)\n", + nlm_procname(proc), host->h_name); + + /* If we have no RPC client yet, create one. */ + if ((clnt = nlm_bind_host(host)) == NULL) + return -ENOLCK; + + /* bootstrap and kick off the async RPC call */ + status = rpc_do_call(clnt, proc, argp, resp, RPC_TASK_ASYNC, + callback, req); + + /* If the async call is proceeding, increment host refcount */ + if (status >= 0 && (req->a_flags & RPC_TASK_ASYNC)) + host->h_count++; + return status; +} + +/* + * TEST for the presence of a conflicting lock + */ +static int +nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl) +{ + int status; + + if ((status = nlmclnt_call(req, NLMPROC_TEST)) < 0) + return status; + + status = req->a_res.status; + if (status == NLM_LCK_GRANTED) { + fl->fl_type = F_UNLCK; + } if (status == NLM_LCK_DENIED) { + /* + * Report the conflicting lock back to the application. + * FIXME: Is it OK to report the pid back as well? + */ + memcpy(fl, &req->a_res.lock.fl, sizeof(*fl)); + /* fl->fl_pid = 0; */ + } else { + return nlm_stat_to_errno(req->a_res.status); + } + + return 0; +} + +/* + * LOCK: Try to create a lock + * + * Programmer Harassment Alert + * + * When given a blocking lock request in a sync RPC call, the HPUX lockd + * will faithfully return LCK_BLOCKED but never cares to notify us when + * the lock could be granted. This way, our local process could hang + * around forever waiting for the callback. + * + * Solution A: Implement busy-waiting + * Solution B: Use the async version of the call (NLM_LOCK_{MSG,RES}) + * + * For now I am implementing solution A, because I hate the idea of + * re-implementing lockd for a third time in two months. The async + * calls shouldn't be too hard to do, however. + * + * This is one of the lovely things about standards in the NFS area: + * they're so soft and squishy you can't really blame HP for doing this. + */ +static int +nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) +{ + struct nlm_host *host = req->a_host; + struct nlm_res *resp = &req->a_res; + int status; + + if (!host->h_monitored && nsm_monitor(host) < 0) { + printk(KERN_NOTICE "lockd: failed to monitor %s\n", + host->h_name); + return -ENOLCK; + } + + while (1) { + if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) { + if (resp->status != NLM_LCK_BLOCKED) + break; + status = nlmclnt_block(host, fl, &resp->status); + } + if (status < 0) + return status; + } + + if (resp->status == NLM_LCK_GRANTED) { + fl->fl_u.nfs_fl.state = host->h_state; + fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED; + } + + return nlm_stat_to_errno(resp->status); +} + +/* + * RECLAIM: Try to reclaim a lock + */ +int +nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) +{ + struct nlm_rqst reqst, *req; + int status; + + req = &reqst; + req->a_host = host; + req->a_flags = 0; + + /* Set up the argument struct */ + nlmclnt_setlockargs(req, fl); + req->a_args.reclaim = 1; + + if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0 + && req->a_res.status == NLM_LCK_GRANTED) + return 0; + + printk(KERN_WARNING "lockd: failed to reclaim lock for pid %d " + "(errno %d, status %d)\n", fl->fl_pid, + status, req->a_res.status); + + /* + * FIXME: This is a serious failure. We can + * + * a. Ignore the problem + * b. Send the owning process some signal (Linux doesn't have + * SIGLOST, though...) + * c. Retry the operation + * + * Until someone comes up with a simple implementation + * for b or c, I'll choose option a. + */ + + return -ENOLCK; +} + +/* + * UNLOCK: remove an existing lock + */ +static int +nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) +{ + struct nlm_res *resp = &req->a_res; + int status; + + /* Clean the GRANTED flag now so the lock doesn't get + * reclaimed while we're stuck in the unlock call. */ + fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED; + + if (req->a_flags & RPC_TASK_ASYNC) { + return nlmclnt_async_call(req, NLMPROC_UNLOCK, + nlmclnt_unlock_callback); + } + + if ((status = nlmclnt_call(req, NLMPROC_UNLOCK)) < 0) + return status; + + if (resp->status == NLM_LCK_GRANTED) + return 0; + + if (resp->status != NLM_LCK_DENIED_NOLOCKS) + printk("lockd: unexpected unlock status: %d\n", resp->status); + + /* What to do now? I'm out of my depth... */ + + return -ENOLCK; +} + +static void +nlmclnt_unlock_callback(struct rpc_task *task) +{ + struct nlm_rqst *req = (struct nlm_rqst *) task->tk_calldata; + int status = req->a_res.status; + + if (RPC_ASSASSINATED(task)) + goto die; + + if (task->tk_status < 0) { + dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status); + nlm_rebind_host(req->a_host); + rpc_restart_call(task); + return; + } + if (status != NLM_LCK_GRANTED + && status != NLM_LCK_DENIED_GRACE_PERIOD) { + printk("lockd: unexpected unlock status: %d\n", status); + } + +die: + rpc_release_task(task); +} + +/* + * Cancel a blocked lock request. + * We always use an async RPC call for this in order not to hang a + * process that has been Ctrl-C'ed. + */ +int +nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) +{ + struct nlm_rqst *req; + unsigned long oldmask = current->blocked; + int status; + + /* Block all signals while setting up call */ + current->blocked = ~0UL; + + do { + req = (struct nlm_rqst *) rpc_allocate(RPC_TASK_ASYNC, + sizeof(*req)); + } while (req == NULL); + req->a_host = host; + req->a_flags = RPC_TASK_ASYNC; + + nlmclnt_setlockargs(req, fl); + + status = nlmclnt_async_call(req, NLMPROC_CANCEL, + nlmclnt_cancel_callback); + if (status < 0) + rpc_free(req); + + current->blocked = oldmask; + return status; +} + +static void +nlmclnt_cancel_callback(struct rpc_task *task) +{ + struct nlm_rqst *req = (struct nlm_rqst *) task->tk_calldata; + + if (RPC_ASSASSINATED(task)) + goto die; + + if (task->tk_status < 0) { + dprintk("lockd: CANCEL call error %d, retrying.\n", + task->tk_status); + goto retry_cancel; + } + + dprintk("lockd: cancel status %d (task %d)\n", + req->a_res.status, task->tk_pid); + + switch (req->a_res.status) { + case NLM_LCK_GRANTED: + case NLM_LCK_DENIED_GRACE_PERIOD: + /* Everything's good */ + break; + case NLM_LCK_DENIED_NOLOCKS: + dprintk("lockd: CANCEL failed (server has no locks)\n"); + goto retry_cancel; + default: + printk(KERN_NOTICE "lockd: weird return %d for CANCEL call\n", + req->a_res.status); + } + +die: + rpc_release_task(task); + nlm_release_host(req->a_host); + kfree(req); + return; + +retry_cancel: + nlm_rebind_host(req->a_host); + rpc_restart_call(task); + rpc_delay(task, 30 * HZ); + return; +} + +/* + * Convert an NLM status code to a generic kernel errno + */ +static int +nlm_stat_to_errno(u32 status) +{ + switch(status) { + case NLM_LCK_GRANTED: + return 0; + case NLM_LCK_DENIED: + return -EAGAIN; + case NLM_LCK_DENIED_NOLOCKS: + case NLM_LCK_DENIED_GRACE_PERIOD: + return -ENOLCK; + case NLM_LCK_BLOCKED: + printk(KERN_NOTICE "lockd: unexpected status NLM_BLOCKED\n"); + return -ENOLCK; + } + printk(KERN_NOTICE "lockd: unexpected server status %d\n", status); + return -ENOLCK; +} diff --git a/fs/lockd/host.c b/fs/lockd/host.c new file mode 100644 index 000000000..027c230a8 --- /dev/null +++ b/fs/lockd/host.c @@ -0,0 +1,323 @@ +/* + * linux/fs/lockd/host.c + * + * Management for NLM peer hosts. The nlm_host struct is shared + * between client and server implementation. The only reason to + * do so is to reduce code bloat. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/malloc.h> +#include <linux/in.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/lockd/lockd.h> + + +#define NLMDBG_FACILITY NLMDBG_HOSTCACHE +#define NLM_HOST_MAX 64 +#define NLM_HOST_NRHASH 32 +#define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1)) +#define NLM_PTRHASH(ptr) ((((u32)(unsigned long) ptr) / 32) & (NLM_HOST_NRHASH-1)) +#define NLM_HOST_REBIND (60 * HZ) +#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) +#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) +#define NLM_HOST_ADDR(sv) (&(sv)->s_nlmclnt->cl_xprt->addr) + +static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH]; +static unsigned long next_gc = 0; +static int nrhosts = 0; +static struct semaphore nlm_host_sema = MUTEX; + + +static void nlm_gc_hosts(void); + +/* + * Find an NLM server handle in the cache. If there is none, create it. + */ +struct nlm_host * +nlmclnt_lookup_host(struct sockaddr_in *sin, int proto, int version) +{ + return nlm_lookup_host(NULL, sin, proto, version); +} + +/* + * Find an NLM client handle in the cache. If there is none, create it. + */ +struct nlm_host * +nlmsvc_lookup_host(struct svc_rqst *rqstp) +{ + return nlm_lookup_host(rqstp->rq_client, &rqstp->rq_addr, 0, 0); +} + +/* + * Match the given host against client/address + */ +static inline int +nlm_match_host(struct nlm_host *host, struct svc_client *clnt, + struct sockaddr_in *sin) +{ + if (clnt) + return host->h_exportent == clnt; + return nlm_cmp_addr(&host->h_addr, sin); +} + +/* + * Common host lookup routine for server & client + */ +struct nlm_host * +nlm_lookup_host(struct svc_client *clnt, struct sockaddr_in *sin, + int proto, int version) +{ + struct nlm_host *host, **hp; + u32 addr; + int hash; + + if (!clnt && !sin) { + printk(KERN_NOTICE "lockd: no clnt or addr in lookup_host!\n"); + return NULL; + } + + dprintk("lockd: nlm_lookup_host(%08x, p=%d, v=%d)\n", + (unsigned)(sin? ntohl(sin->sin_addr.s_addr) : 0), proto, version); + + if (clnt) + hash = NLM_PTRHASH(clnt); + else + hash = NLM_ADDRHASH(sin->sin_addr.s_addr); + + /* Lock hash table */ + down(&nlm_host_sema); + + if (next_gc < jiffies) + nlm_gc_hosts(); + + for (hp = &nlm_hosts[hash]; (host = *hp); hp = &host->h_next) { + if (host->h_version != version || host->h_proto != proto) + continue; + + if (nlm_match_host(host, clnt, sin)) { + if (hp != nlm_hosts + hash) { + *hp = host->h_next; + host->h_next = nlm_hosts[hash]; + nlm_hosts[hash] = host; + } + host->h_expires = jiffies + NLM_HOST_EXPIRE; + host->h_count++; + up(&nlm_host_sema); + return host; + } + } + + /* special hack for nlmsvc_invalidate_client */ + if (sin == NULL) + goto nohost; + + /* Ooops, no host found, create it */ + dprintk("lockd: creating host entry\n"); + + if (!(host = (struct nlm_host *) kmalloc(sizeof(*host), GFP_KERNEL))) + goto nohost; + memset(host, 0, sizeof(*host)); + + addr = sin->sin_addr.s_addr; + sprintf(host->h_name, "%d.%d.%d.%d", + (unsigned char) (ntohl(addr) >> 24), + (unsigned char) (ntohl(addr) >> 16), + (unsigned char) (ntohl(addr) >> 8), + (unsigned char) (ntohl(addr) >> 0)); + + host->h_addr = *sin; + host->h_addr.sin_port = 0; /* ouch! */ + host->h_version = version; + host->h_proto = proto; + host->h_authflavor = RPC_AUTH_NULL; + host->h_rpcclnt = NULL; + host->h_sema = MUTEX; + host->h_nextrebind = jiffies + NLM_HOST_REBIND; + host->h_expires = jiffies + NLM_HOST_EXPIRE; + host->h_count = 1; + host->h_state = 0; /* pseudo NSM state */ + host->h_nsmstate = 0; /* real NSM state */ + host->h_exportent = clnt; + + host->h_next = nlm_hosts[hash]; + nlm_hosts[hash] = host; + + if (++nrhosts > NLM_HOST_MAX) + next_gc = 0; + +nohost: + up(&nlm_host_sema); + return host; +} + +/* + * Create the NLM RPC client for an NLM peer + */ +struct rpc_clnt * +nlm_bind_host(struct nlm_host *host) +{ + struct rpc_clnt *clnt; + struct rpc_xprt *xprt; + + dprintk("lockd: nlm_bind_host(%08x)\n", + (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); + + /* Lock host handle */ + down(&host->h_sema); + + /* If we've already created an RPC client, check whether + * RPC rebind is required */ + if ((clnt = host->h_rpcclnt) != NULL) { + if (host->h_nextrebind < jiffies) { + clnt->cl_port = 0; + host->h_nextrebind = jiffies + NLM_HOST_REBIND; + dprintk("lockd: next rebind in %ld jiffies\n", + host->h_nextrebind - jiffies); + } + } else { + uid_t saved_euid = current->euid; + + /* Create RPC socket as root user so we get a priv port */ + current->euid = 0; + xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL); + current->euid = saved_euid; + if (xprt == NULL) + goto forgetit; + + xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); + + clnt = rpc_create_client(xprt, host->h_name, &nlm_program, + host->h_version, host->h_authflavor); + if (clnt == NULL) { + xprt_destroy(xprt); + goto forgetit; + } + clnt->cl_autobind = 1; /* turn on pmap queries */ + xprt->nocong = 1; /* No congestion control for NLM */ + + host->h_rpcclnt = clnt; + } + + up(&host->h_sema); + return clnt; + +forgetit: + printk("lockd: couldn't create RPC handle for %s\n", host->h_name); + up(&host->h_sema); + return NULL; +} + +/* + * Force a portmap lookup of the remote lockd port + */ +void +nlm_rebind_host(struct nlm_host *host) +{ + dprintk("lockd: rebind host %s\n", host->h_name); + if (host->h_rpcclnt && host->h_nextrebind < jiffies) { + host->h_rpcclnt->cl_port = 0; + host->h_nextrebind = jiffies + NLM_HOST_REBIND; + } +} + +/* + * Release NLM host after use + */ +void +nlm_release_host(struct nlm_host *host) +{ + dprintk("lockd: release host %s\n", host->h_name); + host->h_count -= 1; +} + +/* + * Shut down the hosts module. + * Note that this routine is called only at server shutdown time. + */ +void +nlm_shutdown_hosts(void) +{ + struct nlm_host *host; + int i; + + dprintk("lockd: shutting down host module\n"); + down(&nlm_host_sema); + + /* First, make all hosts eligible for gc */ + dprintk("lockd: nuking all hosts...\n"); + for (i = 0; i < NLM_HOST_NRHASH; i++) { + for (host = nlm_hosts[i]; host; host = host->h_next) + host->h_expires = 0; + } + + /* Then, perform a garbage collection pass */ + nlm_gc_hosts(); + up(&nlm_host_sema); + + /* complain if any hosts are left */ + if (nrhosts) { + printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); + dprintk("lockd: %d hosts left:\n", nrhosts); + for (i = 0; i < NLM_HOST_NRHASH; i++) { + for (host = nlm_hosts[i]; host; host = host->h_next) { + dprintk(" %s (cnt %d use %d exp %ld)\n", + host->h_name, host->h_count, + host->h_inuse, host->h_expires); + } + } + } +} + +/* + * Garbage collect any unused NLM hosts. + * This GC combines reference counting for async operations with + * mark & sweep for resources held by remote clients. + */ +static void +nlm_gc_hosts(void) +{ + struct nlm_host **q, *host; + struct rpc_clnt *clnt; + int i; + + dprintk("lockd: host garbage collection\n"); + for (i = 0; i < NLM_HOST_NRHASH; i++) { + for (host = nlm_hosts[i]; host; host = host->h_next) + host->h_inuse = 0; + } + + /* Mark all hosts that hold locks, blocks or shares */ + nlmsvc_mark_resources(); + + for (i = 0; i < NLM_HOST_NRHASH; i++) { + q = &nlm_hosts[i]; + while ((host = *q) != NULL) { + if (host->h_count || host->h_inuse + || host->h_expires >= jiffies) { + q = &host->h_next; + continue; + } + dprintk("lockd: delete host %s\n", host->h_name); + *q = host->h_next; + if ((clnt = host->h_rpcclnt) != NULL) { + if (clnt->cl_users) { + printk(KERN_WARNING + "lockd: active RPC handle\n"); + clnt->cl_dead = 1; + } else { + rpc_destroy_client(host->h_rpcclnt); + } + } + kfree(host); + nrhosts--; + } + } + + next_gc = jiffies + NLM_HOST_COLLECT; +} + diff --git a/fs/lockd/lockd_syms.c b/fs/lockd/lockd_syms.c new file mode 100644 index 000000000..b3990ed98 --- /dev/null +++ b/fs/lockd/lockd_syms.c @@ -0,0 +1,42 @@ +/* + * linux/fs/lockd/lockd_syms.c + * + * Symbols exported by the lockd module. + * + * Authors: Olaf Kirch (okir@monad.swb.de) + * + * Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de> + */ + +#define __NO_VERSION__ +#include <linux/config.h> +#include <linux/module.h> + +#ifdef CONFIG_MODULES + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sched.h> +#include <linux/uio.h> +#include <linux/unistd.h> + +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/lockd/lockd.h> + +/* Start/stop the daemon */ +EXPORT_SYMBOL(lockd_up); +EXPORT_SYMBOL(lockd_down); + +/* NFS client entry */ +EXPORT_SYMBOL(nlmclnt_proc); + +/* NFS server entry points/hooks */ +EXPORT_SYMBOL(nlmsvc_invalidate_client); +EXPORT_SYMBOL(nlmsvc_ops); + +/* Configuration at insmod time */ +EXPORT_SYMBOL(nlmsvc_grace_period); +EXPORT_SYMBOL(nlmsvc_timeout); + +#endif /* CONFIG_MODULES */ diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c new file mode 100644 index 000000000..bcd747e3f --- /dev/null +++ b/fs/lockd/mon.c @@ -0,0 +1,228 @@ +/* + * linux/fs/lockd/mon.c + * + * The kernel statd client. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/utsname.h> +#include <linux/kernel.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/lockd/lockd.h> +#include <linux/lockd/sm_inter.h> + + +#define NLMDBG_FACILITY NLMDBG_MONITOR + +static struct rpc_clnt * nsm_create(void); + +extern struct rpc_program nsm_program; + +/* + * Local NSM state + */ +u32 nsm_local_state = 0; + +/* + * Common procedure for SM_MON/SM_UNMON calls + */ +static int +nsm_mon_unmon(struct nlm_host *host, char *what, u32 proc) +{ + struct rpc_clnt *clnt; + struct nsm_args args; + struct nsm_res res; + int status; + + dprintk("lockd: nsm_%s(%s)\n", what, host->h_name); + if (!(clnt = nsm_create())) + return -EACCES; + + args.addr = host->h_addr.sin_addr.s_addr; + args.prog = NLM_PROGRAM; + args.vers = 1; + args.proc = NLMPROC_NSM_NOTIFY; + + if ((status = rpc_call(clnt, proc, &args, &res, 0)) < 0) + return status; + + if (res.status != 0) { + printk(KERN_NOTICE "lockd: cannot %s %s\n", what, host->h_name); + return -EACCES; + } + + nsm_local_state = res.state; + return 0; +} + +/* + * Set up monitoring of a remote host + */ +int +nsm_monitor(struct nlm_host *host) +{ + int status; + + if ((status = nsm_mon_unmon(host, "monitor", SM_MON)) >= 0) + host->h_monitored = 1; + return status; +} + +/* + * Cease to monitor remote host + */ +int +nsm_unmonitor(struct nlm_host *host) +{ + int status; + + if ((status = nsm_mon_unmon(host, "unmonitor", SM_UNMON)) >= 0) + host->h_monitored = 0; + return status; +} + +/* + * Create NSM client for the local host + */ +static struct rpc_clnt * +nsm_create(void) +{ + struct sockaddr_in sin; + struct rpc_xprt *xprt; + struct rpc_clnt *clnt; + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + sin.sin_port = 0; + + if (!(xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL))) + return NULL; + + clnt = rpc_create_client(xprt, "localhost", + &nsm_program, SM_VERSION, + RPC_AUTH_NULL); + if (!clnt) { + xprt_destroy(xprt); + } else { + clnt->cl_softrtry = 1; + clnt->cl_chatty = 1; + clnt->cl_oneshot = 1; + } + return clnt; +} + +/* + * XDR functions for NSM. + */ +static int +xdr_error(struct rpc_rqst *rqstp, u32 *p, void *dummy) +{ + return -EACCES; +} + +static int +xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) +{ + char buffer[20]; + u32 addr = ntohl(argp->addr); + + dprintk("nsm: xdr_encode_mon(%08lx, %ld, %ld, %ld)\n", + htonl(argp->addr), htonl(argp->proc), + htonl(argp->vers), htonl(argp->proc)); + + /* + * Use the dotted-quad IP address of the remote host as + * identifier. Linux statd always looks up the canonical + * hostname first for whatever remote hostname it receives, + * so this works alright. + */ + sprintf(buffer, "%d.%d.%d.%d", (addr>>24) & 0xff, (addr>>16) & 0xff, + (addr>>8) & 0xff, (addr) & 0xff); + if (!(p = xdr_encode_string(p, buffer)) + || !(p = xdr_encode_string(p, system_utsname.nodename))) + return -EIO; + *p++ = htonl(argp->prog); + *p++ = htonl(argp->vers); + *p++ = htonl(argp->proc); + + /* This is the private part. Needed only for SM_MON call */ + if (rqstp->rq_task->tk_proc == SM_MON) { + *p++ = argp->addr; + *p++ = 0; + *p++ = 0; + *p++ = 0; + } + + rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); + return 0; +} + +static int +xdr_decode_stat_res(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp) +{ + resp->status = ntohl(*p++); + resp->state = ntohl(*p++); + dprintk("nsm: xdr_decode_stat_res status %d state %d\n", + resp->status, resp->state); + return 0; +} + +static int +xdr_decode_stat(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp) +{ + resp->status = ntohl(*p++); + return 0; +} + +#define SM_my_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN)) +#define SM_my_id_sz (3+1+SM_my_name_sz) +#define SM_mon_id_sz (1+XDR_QUADLEN(20)+SM_my_id_sz) +#define SM_mon_sz (SM_mon_id_sz+4) + +static struct rpc_procinfo nsm_procedures[] = { + { "sm_null", + (kxdrproc_t) xdr_error, + (kxdrproc_t) xdr_error, 0, 0 }, + { "sm_stat", + (kxdrproc_t) xdr_error, + (kxdrproc_t) xdr_error, 0, 0 }, + { "sm_mon", + (kxdrproc_t) xdr_encode_mon, + (kxdrproc_t) xdr_decode_stat_res, SM_mon_sz, 2 }, + { "sm_unmon", + (kxdrproc_t) xdr_encode_mon, + (kxdrproc_t) xdr_decode_stat, SM_mon_id_sz, 1 }, + { "sm_unmon_all", + (kxdrproc_t) xdr_error, + (kxdrproc_t) xdr_error, 0, 0 }, + { "sm_simu_crash", + (kxdrproc_t) xdr_error, + (kxdrproc_t) xdr_error, 0, 0 }, + { "sm_notify", + (kxdrproc_t) xdr_error, + (kxdrproc_t) xdr_error, 0, 0 }, +}; + +static struct rpc_version nsm_version1 = { + 1, + sizeof(nsm_procedures)/sizeof(nsm_procedures[0]), + nsm_procedures +}; + +static struct rpc_version * nsm_version[] = { + NULL, + &nsm_version1, +}; + +static struct rpc_stat nsm_stats; + +struct rpc_program nsm_program = { + "statd", + SM_PROGRAM, + sizeof(nsm_version)/sizeof(nsm_version[0]), + nsm_version, + &nsm_stats +}; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c new file mode 100644 index 000000000..fc133b51e --- /dev/null +++ b/fs/lockd/svc.c @@ -0,0 +1,284 @@ +/* + * linux/fs/lockd/svc.c + * + * This is the central lockd service. + * + * FIXME: Separate the lockd NFS server functionality from the lockd NFS + * client functionality. Oh why didn't Sun create two separate + * services in the first place? + * + * Authors: Olaf Kirch (okir@monad.swb.de) + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#define __KERNEL_SYSCALLS__ +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/nfs.h> +#include <linux/in.h> +#include <linux/uio.h> +#include <linux/version.h> +#include <linux/unistd.h> +#include <linux/malloc.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> + +#include <linux/sunrpc/types.h> +#include <linux/sunrpc/stats.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svcsock.h> +#include <linux/lockd/lockd.h> + + +#define NLMDBG_FACILITY NLMDBG_SVC +#define LOCKD_BUFSIZE (1024 + NLMSSVC_XDRSIZE) +#define BLOCKABLE_SIGS (~(_S(SIGKILL) | _S(SIGSTOP))) +#define _S(sig) (1 << ((sig) - 1)) + +extern struct svc_program nlmsvc_program; +struct nlmsvc_binding * nlmsvc_ops = NULL; +static int nlmsvc_sema = 0; +static int nlmsvc_pid = 0; +unsigned long nlmsvc_grace_period = 0; +unsigned long nlmsvc_timeout = 0; + +/* + * Currently the following can be set only at insmod time. + * Ideally, they would be accessible through the sysctl interface. + */ +unsigned long nlm_grace_period = 0; +unsigned long nlm_timeout = LOCKD_DFLT_TIMEO; + +/* + * This is the lockd kernel thread + */ +static void +lockd(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + sigset_t oldsigmask; + int err = 0; + + lock_kernel(); + /* Lock module and set up kernel thread */ + MOD_INC_USE_COUNT; + /* exit_files(current); */ + exit_mm(current); + current->session = 1; + current->pgrp = 1; + sprintf(current->comm, "lockd"); + + /* kick rpciod */ + rpciod_up(); + + dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); + + if (!nlm_timeout) + nlm_timeout = LOCKD_DFLT_TIMEO; + +#ifdef RPC_DEBUG + nlmsvc_grace_period = 10 * HZ; +#else + if (nlm_grace_period) { + nlmsvc_grace_period += (1 + nlm_grace_period / nlm_timeout) + * nlm_timeout * HZ; + } else { + nlmsvc_grace_period += 5 * nlm_timeout * HZ; + } +#endif + + nlmsvc_grace_period += jiffies; + nlmsvc_timeout = nlm_timeout * HZ; + nlmsvc_pid = current->pid; + + /* + * The main request loop. We don't terminate until the last + * NFS mount or NFS daemon has gone away, and we've been sent a + * signal. + */ + while (nlmsvc_sema || !signalled()) { + if (signalled()) + current->signal = 0; + + /* + * Retry any blocked locks that have been notified by + * the VFS. Don't do this during grace period. + * (Theoretically, there shouldn't even be blocked locks + * during grace period). + */ + if (!nlmsvc_grace_period) { + current->timeout = nlmsvc_retry_blocked(); + } else if (nlmsvc_grace_period < jiffies) + nlmsvc_grace_period = 0; + + /* + * Find a socket with data available and call its + * recvfrom routine. + */ + if ((err = svc_recv(serv, rqstp)) == -EAGAIN) + continue; + if (err < 0) { + if (err != -EINTR) + printk(KERN_WARNING + "lockd: terminating on error %d\n", + -err); + break; + } + + dprintk("lockd: request from %08x\n", + (unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr)); + + /* + * Look up the NFS client handle. The handle is needed for + * all but the GRANTED callback RPCs. + */ + if (nlmsvc_ops) { + nlmsvc_ops->exp_readlock(); + rqstp->rq_client = + nlmsvc_ops->exp_getclient(&rqstp->rq_addr); + } else { + rqstp->rq_client = NULL; + } + + /* Process request with all signals blocked. */ + oldsigmask = current->blocked; + current->blocked = BLOCKABLE_SIGS; + svc_process(serv, rqstp); + current->blocked = oldsigmask; + + /* Unlock export hash tables */ + if (nlmsvc_ops) + nlmsvc_ops->exp_unlock(); + } + + nlm_shutdown_hosts(); + + /* Exit the RPC thread */ + svc_exit_thread(rqstp); + + /* release rpciod */ + rpciod_down(); + + /* Release module */ + MOD_DEC_USE_COUNT; + nlmsvc_pid = 0; +} + +/* + * Make a socket for lockd + * FIXME: Move this to net/sunrpc/svc.c so that we can share this with nfsd. + */ +static int +lockd_makesock(struct svc_serv *serv, int protocol, unsigned short port) +{ + struct sockaddr_in sin; + + dprintk("lockd: creating socket proto = %d\n", protocol); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = INADDR_ANY; + sin.sin_port = htons(port); + return svc_create_socket(serv, protocol, &sin); +} + +int +lockd_up(void) +{ + struct svc_serv * serv; + int error; + + if (nlmsvc_pid || nlmsvc_sema++) + return 0; + + dprintk("lockd: creating service\n"); + if ((serv = svc_create(&nlmsvc_program, 0, NLMSVC_XDRSIZE)) == NULL) + return -ENOMEM; + + if ((error = lockd_makesock(serv, IPPROTO_UDP, 0)) < 0 + || (error = lockd_makesock(serv, IPPROTO_TCP, 0)) < 0) { + svc_destroy(serv); + return error; + } + + if ((error = svc_create_thread(lockd, serv)) < 0) + nlmsvc_sema--; + + /* Release server */ + svc_destroy(serv); + return 0; +} + +void +lockd_down(void) +{ + if (!nlmsvc_pid || --nlmsvc_sema > 0) + return; + + kill_proc(nlmsvc_pid, SIGKILL, 1); + nlmsvc_sema = 0; + nlmsvc_pid = 0; +} + +#ifdef MODULE +/* New module support in 2.1.18 */ +#if LINUX_VERSION_CODE >= 0x020112 + EXPORT_NO_SYMBOLS; + MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); + MODULE_DESCRIPTION("NFS file locking service version " LOCKD_VERSION "."); + MODULE_PARM(nlm_grace_period, "10-240l"); + MODULE_PARM(nlm_timeout, "3-20l"); +#endif +int +init_module(void) +{ + nlmxdr_init(); + return 0; +} + +void +cleanup_module(void) +{ + /* FIXME: delete all NLM clients */ + nlm_shutdown_hosts(); +} +#endif + +/* + * Define NLM program and procedures + */ +static struct svc_version nlmsvc_version1 = { + 1, 16, nlmsvc_procedures, NULL +}; +static struct svc_version nlmsvc_version3 = { + 3, 24, nlmsvc_procedures, NULL +}; +#ifdef CONFIG_NFSD_NFS3 +static struct svc_version nlmsvc_version4 = { + 4, 24, nlmsvc_procedures4, NULL +}; +#endif +static struct svc_version * nlmsvc_version[] = { + NULL, + &nlmsvc_version1, + NULL, + &nlmsvc_version3, +#ifdef CONFIG_NFSD_NFS3 + &nlmsvc_version4, +#endif +}; + +static struct svc_stat nlmsvc_stats; + +#define NLM_NRVERS (sizeof(nlmsvc_version)/sizeof(nlmsvc_version[0])) +struct svc_program nlmsvc_program = { + NLM_PROGRAM, /* program number */ + 1, NLM_NRVERS-1, /* version range */ + NLM_NRVERS, /* number of entries in nlmsvc_version */ + nlmsvc_version, /* version table */ + "lockd", /* service name */ + &nlmsvc_stats, /* stats table */ +}; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c new file mode 100644 index 000000000..b4d74f745 --- /dev/null +++ b/fs/lockd/svclock.c @@ -0,0 +1,620 @@ +/* + * linux/fs/lockd/svclock.c + * + * Handling of server-side locks, mostly of the blocked variety. + * This is the ugliest part of lockd because we tread on very thin ice. + * GRANT and CANCEL calls may get stuck, meet in mid-flight, etc. + * IMNSHO introducing the grant callback into the NLM protocol was one + * of the worst ideas Sun ever had. Except maybe for the idea of doing + * NFS file locking at all. + * + * I'm trying hard to avoid race conditions by protecting most accesses + * to a file's list of blocked locks through a semaphore. The global + * list of blocked locks is not protected in this fashion however. + * Therefore, some functions (such as the RPC callback for the async grant + * call) move blocked locks towards the head of the list *while some other + * process might be traversing it*. This should not be a problem in + * practice, because this will only cause functions traversing the list + * to visit some blocks twice. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/lockd/nlm.h> +#include <linux/lockd/lockd.h> + + +#define NLMDBG_FACILITY NLMDBG_SVCLOCK + +static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); +static int nlmsvc_remove_block(struct nlm_block *block); +static void nlmsvc_grant_callback(struct rpc_task *task); +static void nlmsvc_notify_blocked(struct file_lock *); + +/* + * The list of blocked locks to retry + */ +static struct nlm_block * nlm_blocked = NULL; + +/* + * Insert a blocked lock into the global list + */ +static void +nlmsvc_insert_block(struct nlm_block *block, unsigned long when) +{ + struct nlm_block **bp, *b; + + dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when); + if (block->b_queued) + nlmsvc_remove_block(block); + for (bp = &nlm_blocked; (b = *bp); bp = &b->b_next) + if (when < b->b_when) + break; + + block->b_queued = 1; + block->b_when = when; + block->b_next = b; + *bp = block; +} + +/* + * Remove a block from the global list + */ +static int +nlmsvc_remove_block(struct nlm_block *block) +{ + struct nlm_block **bp, *b; + + if (!block->b_queued) + return 1; + for (bp = &nlm_blocked; (b = *bp); bp = &b->b_next) { + if (b == block) { + *bp = block->b_next; + block->b_queued = 0; + return 1; + } + } + + return 0; +} + +/* + * Find a block for a given lock and optionally remove it from + * the list. + */ +static struct nlm_block * +nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove) +{ + struct nlm_block **head, *block; + struct file_lock *fl; + + dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %ld-%ld ty=%d\n", + file, lock->fl.fl_pid, lock->fl.fl_start, + lock->fl.fl_end, lock->fl.fl_type); + for (head = &nlm_blocked; (block = *head); head = &block->b_next) { + fl = &block->b_call.a_args.lock.fl; + dprintk(" check f=%p pd=%d %ld-%ld ty=%d\n", + block->b_file, fl->fl_pid, fl->fl_start, + fl->fl_end, fl->fl_type); + if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) { + if (remove) + *head = block->b_next; + return block; + } + } + + return NULL; +} + +/* + * Find a block with a given NLM cookie. + */ +static inline struct nlm_block * +nlmsvc_find_block(u32 cookie) +{ + struct nlm_block *block; + + for (block = nlm_blocked; block; block = block->b_next) { + if (block->b_call.a_args.cookie == cookie) + break; + } + + return block; +} + +/* + * Create a block and initialize it. + * + * Note: we explicitly set the cookie of the grant reply to that of + * the blocked lock request. The spec explicitly mentions that the client + * should _not_ rely on the callback containing the same cookie as the + * request, but (as I found out later) that's because some implementations + * do just this. Never mind the standards comittees, they support our + * logging industries. + */ +static inline struct nlm_block * +nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, + struct nlm_lock *lock, u32 cookie) +{ + struct nlm_block *block; + struct nlm_host *host; + struct nlm_rqst *call; + + /* Create host handle for callback */ + host = nlmclnt_lookup_host(&rqstp->rq_addr, + rqstp->rq_prot, rqstp->rq_vers); + if (host == NULL) + return NULL; + + /* Allocate memory for block, and initialize arguments */ + if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL))) + goto failed; + memset(block, 0, sizeof(*block)); + + /* Set notifier function for VFS, and init args */ + lock->fl.fl_notify = nlmsvc_notify_blocked; + if (!nlmclnt_setgrantargs(&block->b_call, lock)) { + kfree(block); + goto failed; + } + block->b_call.a_args.cookie = cookie; /* see above */ + + dprintk("lockd: created block %p...\n", block); + + /* Create and initialize the block */ + block->b_daemon = rqstp->rq_server; + block->b_host = host; + block->b_file = file; + + /* Add to file's list of blocks */ + block->b_fnext = file->f_blocks; + file->f_blocks = block; + + /* Set up RPC arguments for callback */ + call = &block->b_call; + call->a_host = host; + call->a_flags = RPC_TASK_ASYNC; + + return block; + +failed: + nlm_release_host(host); + return NULL; +} + +/* + * Delete a block. If the lock was cancelled or the grant callback + * failed, unlock is set to 1. + * It is the caller's responsibility to check whether the file + * can be closed hereafter. + */ +static void +nlmsvc_delete_block(struct nlm_block *block, int unlock) +{ + struct file_lock *fl = &block->b_call.a_args.lock.fl; + struct nlm_file *file = block->b_file; + struct nlm_block **bp; + + dprintk("lockd: deleting block %p...\n", block); + + /* Remove block from list */ + nlmsvc_remove_block(block); + + /* If granted, unlock it, else remove from inode block list */ + if (unlock && block->b_granted) { + dprintk("lockd: deleting granted lock\n"); + fl->fl_type = F_UNLCK; + posix_lock_file(&block->b_file->f_file, fl, 0); + block->b_granted = 0; + } else { + dprintk("lockd: unblocking blocked lock\n"); + posix_unblock_lock(fl); + } + + /* If the block is in the middle of a GRANT callback, + * don't kill it yet. */ + if (block->b_incall) { + nlmsvc_insert_block(block, NLM_NEVER); + block->b_done = 1; + return; + } + + /* Remove block from file's list of blocks */ + for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) { + if (*bp == block) { + *bp = block->b_fnext; + break; + } + } + + if (block->b_host) + nlm_release_host(block->b_host); + nlmclnt_freegrantargs(&block->b_call); + kfree(block); +} + +/* + * Loop over all blocks and perform the action specified. + * (NLM_ACT_CHECK handled by nlmsvc_inspect_file). + */ +int +nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action) +{ + struct nlm_block *block, *next; + + down(&file->f_sema); + for (block = file->f_blocks; block; block = next) { + next = block->b_fnext; + if (action == NLM_ACT_MARK) + block->b_host->h_inuse = 1; + else if (action == NLM_ACT_UNLOCK) { + if (host == NULL || host == block->b_host) + nlmsvc_delete_block(block, 1); + } + } + up(&file->f_sema); + return 0; +} + +/* + * Attempt to establish a lock, and if it can't be granted, block it + * if required. + */ +u32 +nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, + struct nlm_lock *lock, int wait, u32 cookie) +{ + struct file_lock *conflock; + struct nlm_block *block; + int error; + + dprintk("lockd: nlmsvc_lock(%04x/%ld, ty=%d, pi=%d, %ld-%ld, bl=%d)\n", + file->f_file.f_inode->i_dev, + file->f_file.f_inode->i_ino, + lock->fl.fl_type, lock->fl.fl_pid, + lock->fl.fl_start, + lock->fl.fl_end, + wait); + + /* Lock file against concurrent access */ + down(&file->f_sema); + + /* Get existing block (in case client is busy-waiting) */ + block = nlmsvc_lookup_block(file, lock, 0); + + lock->fl.fl_flags |= FL_LOCKD; + +again: + if (!(conflock = posix_test_lock(&file->f_file, &lock->fl))) { + error = posix_lock_file(&file->f_file, &lock->fl, 0); + + if (block) + nlmsvc_delete_block(block, 0); + up(&file->f_sema); + + dprintk("lockd: posix_lock_file returned %d\n", -error); + switch(-error) { + case 0: + return nlm_granted; + case EDEADLK: /* no applicable NLM status */ + case EAGAIN: + return nlm_lck_denied; + default: /* includes ENOLCK */ + return nlm_lck_denied_nolocks; + } + } + + if (!wait) { + up(&file->f_sema); + return nlm_lck_denied; + } + + /* If we don't have a block, create and initialize it. Then + * retry because we may have slept in kmalloc. */ + if (block == NULL) { + dprintk("lockd: blocking on this lock (allocating).\n"); + if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie))) + return nlm_lck_denied_nolocks; + goto again; + } + + /* Append to list of blocked */ + nlmsvc_insert_block(block, NLM_NEVER); + + /* Now add block to block list of the conflicting lock */ + dprintk("lockd: blocking on this lock.\n"); + posix_block_lock(conflock, &block->b_call.a_args.lock.fl); + + up(&file->f_sema); + return nlm_lck_blocked; +} + +/* + * Test for presence of a conflicting lock. + */ +u32 +nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock, + struct nlm_lock *conflock) +{ + struct file_lock *fl; + + dprintk("lockd: nlmsvc_testlock(%04x/%ld, ty=%d, %ld-%ld)\n", + file->f_file.f_inode->i_dev, + file->f_file.f_inode->i_ino, + lock->fl.fl_type, + lock->fl.fl_start, + lock->fl.fl_end); + + if ((fl = posix_test_lock(&file->f_file, &lock->fl)) != NULL) { + dprintk("lockd: conflicting lock(ty=%d, %ld-%ld)\n", + fl->fl_type, fl->fl_start, fl->fl_end); + conflock->caller = "somehost"; /* FIXME */ + conflock->oh.len = 0; /* don't return OH info */ + conflock->fl = *fl; + return nlm_lck_denied; + } + + return nlm_granted; +} + +/* + * Remove a lock. + * This implies a CANCEL call: We send a GRANT_MSG, the client replies + * with a GRANT_RES call which gets lost, and calls UNLOCK immediately + * afterwards. In this case the block will still be there, and hence + * must be removed. + */ +u32 +nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock) +{ + int error; + + dprintk("lockd: nlmsvc_unlock(%04x/%ld, pi=%d, %ld-%ld)\n", + file->f_file.f_inode->i_dev, + file->f_file.f_inode->i_ino, + lock->fl.fl_pid, + lock->fl.fl_start, + lock->fl.fl_end); + + /* First, cancel any lock that might be there */ + nlmsvc_cancel_blocked(file, lock); + + lock->fl.fl_type = F_UNLCK; + error = posix_lock_file(&file->f_file, &lock->fl, 0); + + return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; +} + +/* + * Cancel a previously blocked request. + * + * A cancel request always overrides any grant that may currently + * be in progress. + * The calling procedure must check whether the file can be closed. + */ +u32 +nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) +{ + struct nlm_block *block; + + dprintk("lockd: nlmsvc_cancel(%04x/%ld, pi=%d, %ld-%ld)\n", + file->f_file.f_inode->i_dev, + file->f_file.f_inode->i_ino, + lock->fl.fl_pid, + lock->fl.fl_start, + lock->fl.fl_end); + + down(&file->f_sema); + if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) + nlmsvc_delete_block(block, 1); + up(&file->f_sema); + return nlm_granted; +} + +/* + * Unblock a blocked lock request. This is a callback invoked from the + * VFS layer when a lock on which we blocked is removed. + * + * This function doesn't grant the blocked lock instantly, but rather moves + * the block to the head of nlm_blocked where it can be picked up by lockd. + */ +static void +nlmsvc_notify_blocked(struct file_lock *fl) +{ + struct nlm_block **bp, *block; + + dprintk("lockd: VFS unblock notification for block %p\n", fl); + posix_unblock_lock(fl); + for (bp = &nlm_blocked; (block = *bp); bp = &block->b_next) { + if (&block->b_call.a_args.lock.fl == fl) { + svc_wake_up(block->b_daemon); + nlmsvc_insert_block(block, 0); + return; + } + } + + printk(KERN_WARNING "lockd: notification for unknown block!\n"); +} + +/* + * Try to claim a lock that was previously blocked. + * + * Note that we use both the RPC_GRANTED_MSG call _and_ an async + * RPC thread when notifying the client. This seems like overkill... + * Here's why: + * - we don't want to use a synchronous RPC thread, otherwise + * we might find ourselves hanging on a dead portmapper. + * - Some lockd implementations (e.g. HP) don't react to + * RPC_GRANTED calls; they seem to insist on RPC_GRANTED_MSG calls. + */ +static void +nlmsvc_grant_blocked(struct nlm_block *block) +{ + struct nlm_file *file = block->b_file; + struct nlm_lock *lock = &block->b_call.a_args.lock; + struct file_lock *conflock; + int error; + + dprintk("lockd: grant blocked lock %p\n", block); + + /* First thing is lock the file */ + down(&file->f_sema); + + /* Unlink block request from list */ + nlmsvc_remove_block(block); + + /* If b_granted is true this means we've been here before. + * Just retry the grant callback, possibly refreshing the RPC + * binding */ + if (block->b_granted) { + nlm_rebind_host(block->b_host); + goto callback; + } + + /* Try the lock operation again */ + if ((conflock = posix_test_lock(&file->f_file, &lock->fl)) != NULL) { + /* Bummer, we blocked again */ + dprintk("lockd: lock still blocked\n"); + nlmsvc_insert_block(block, NLM_NEVER); + posix_block_lock(conflock, &lock->fl); + up(&file->f_sema); + return; + } + + /* Alright, no conflicting lock. Now lock it for real. If the + * following yields an error, this is most probably due to low + * memory. Retry the lock in a few seconds. + */ + if ((error = posix_lock_file(&file->f_file, &lock->fl, 0)) < 0) { + printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", + -error, __FUNCTION__); + nlmsvc_insert_block(block, jiffies + 10 * HZ); + up(&file->f_sema); + return; + } + +callback: + /* Lock was granted by VFS. */ + dprintk("lockd: GRANTing blocked lock.\n"); + block->b_granted = 1; + block->b_incall = 1; + + /* Schedule next grant callback in 30 seconds */ + nlmsvc_insert_block(block, jiffies + 30 * HZ); + + /* Call the client */ + nlmclnt_async_call(&block->b_call, NLMPROC_GRANTED_MSG, + nlmsvc_grant_callback); + up(&file->f_sema); +} + +/* + * This is the callback from the RPC layer when the NLM_GRANTED_MSG + * RPC call has succeeded or timed out. + * Like all RPC callbacks, it is invoked by the rpciod process, so it + * better not sleep. Therefore, we put the blocked lock on the nlm_blocked + * chain once more in order to have it removed by lockd itself (which can + * then sleep on the file semaphore without disrupting e.g. the nfs client). + */ +static void +nlmsvc_grant_callback(struct rpc_task *task) +{ + struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; + struct nlm_block *block; + unsigned long timeout; + + dprintk("lockd: GRANT_MSG RPC callback\n"); + if (!(block = nlmsvc_find_block(call->a_args.cookie))) { + dprintk("lockd: no block for cookie %x\n", call->a_args.cookie); + return; + } + + /* Technically, we should down the file semaphore here. Since we + * move the block towards the head of the queue only, no harm + * can be done, though. */ + if (task->tk_status < 0) { + /* RPC error: Re-insert for retransmission */ + timeout = jiffies + 10 * HZ; + } else if (block->b_done) { + /* Block already removed, kill it for real */ + timeout = 0; + } else { + /* Call was successful, now wait for client callback */ + timeout = jiffies + 60 * HZ; + } + nlmsvc_insert_block(block, timeout); + svc_wake_up(block->b_daemon); + block->b_incall = 0; + + nlm_release_host(call->a_host); + rpc_release_task(task); +} + +/* + * We received a GRANT_RES callback. Try to find the corresponding + * block. + */ +void +nlmsvc_grant_reply(u32 cookie, u32 status) +{ + struct nlm_block *block; + struct nlm_file *file; + + if (!(block = nlmsvc_find_block(cookie))) + return; + file = block->b_file; + + file->f_count++; + down(&file->f_sema); + if ((block = nlmsvc_find_block(cookie)) != NULL) { + if (status == NLM_LCK_DENIED_GRACE_PERIOD) { + /* Try again in a couple of seconds */ + nlmsvc_insert_block(block, jiffies + 10 * HZ); + block = NULL; + } else { + /* Lock is now held by client, or has been rejected. + * In both cases, the block should be removed. */ + file->f_count++; + up(&file->f_sema); + if (status == NLM_LCK_GRANTED) + nlmsvc_delete_block(block, 0); + else + nlmsvc_delete_block(block, 1); + } + } + if (!block) + up(&file->f_sema); + nlm_release_file(file); +} + +/* + * Retry all blocked locks that have been notified. This is where lockd + * picks up locks that can be granted, or grant notifications that must + * be retransmitted. + */ +unsigned long +nlmsvc_retry_blocked(void) +{ + struct nlm_block *block; + + dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", + nlm_blocked, + nlm_blocked? nlm_blocked->b_when : 0); + while ((block = nlm_blocked) && block->b_when < jiffies) { + dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n", + block, block->b_when, block->b_done); + if (block->b_done) + nlmsvc_delete_block(block, 0); + else + nlmsvc_grant_blocked(block); + } + + if ((block = nlm_blocked) && block->b_when != NLM_NEVER) + return block->b_when; + return 0; +} diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c new file mode 100644 index 000000000..eed15bead --- /dev/null +++ b/fs/lockd/svcproc.c @@ -0,0 +1,551 @@ +/* + * linux/fs/lockd/svcproc.c + * + * Lockd server procedures. We don't implement the NLM_*_RES + * procedures because we don't use the async procedures. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/malloc.h> +#include <linux/in.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfsd/nfsd.h> +#include <linux/lockd/lockd.h> +#include <linux/lockd/share.h> +#include <linux/lockd/sm_inter.h> + + +#define NLMDBG_FACILITY NLMDBG_CLIENT + +static u32 nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *); +static void nlmsvc_callback_exit(struct rpc_task *); + +/* + * Obtain client and file from arguments + */ +static u32 +nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, + struct nlm_host **hostp, struct nlm_file **filp) +{ + struct nlm_host *host = NULL; + struct nlm_file *file = NULL; + struct nlm_lock *lock = &argp->lock; + u32 error; + + /* nfsd callbacks must have been installed for this procedure */ + if (!nlmsvc_ops) + return nlm_lck_denied_nolocks; + + /* Obtain handle for client host */ + if (rqstp->rq_client == NULL) { + printk(KERN_NOTICE + "lockd: unauthenticated request from (%08lx:%d)\n", + ntohl(rqstp->rq_addr.sin_addr.s_addr), + ntohs(rqstp->rq_addr.sin_port)); + return nlm_lck_denied_nolocks; + } + + /* Obtain host handle */ + if (!(host = nlmsvc_lookup_host(rqstp)) + || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0)) + goto no_locks; + *hostp = host; + + /* Obtain file pointer. Not used by FREE_ALL call. */ + if (filp != NULL) { + if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0) + goto no_locks; + *filp = file; + + /* Set up the missing parts of the file_lock structure */ + lock->fl.fl_file = &file->f_file; + lock->fl.fl_owner = host; + } + + return 0; + +no_locks: + if (host) + nlm_release_host(host); + return nlm_lck_denied_nolocks; +} + +/* + * NULL: Test for presence of service + */ +static int +nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + dprintk("lockd: NULL called\n"); + return rpc_success; +} + +/* + * TEST: Check for conflicting lock + */ +static int +nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, + struct nlm_res *resp) +{ + struct nlm_host *host; + struct nlm_file *file; + + dprintk("lockd: TEST called\n"); + resp->cookie = argp->cookie; + + /* Don't accept test requests during grace period */ + if (nlmsvc_grace_period) { + resp->status = nlm_lck_denied_grace_period; + return rpc_success; + } + + /* Obtain client and file */ + if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) + return rpc_success; + + /* Now check for conflicting locks */ + resp->status = nlmsvc_testlock(file, &argp->lock, &resp->lock); + + dprintk("lockd: TEST status %ld\n", ntohl(resp->status)); + nlm_release_host(host); + nlm_release_file(file); + return rpc_success; +} + +static int +nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, + struct nlm_res *resp) +{ + struct nlm_host *host; + struct nlm_file *file; + + dprintk("lockd: LOCK called\n"); + + resp->cookie = argp->cookie; + + /* Don't accept new lock requests during grace period */ + if (nlmsvc_grace_period && !argp->reclaim) { + resp->status = nlm_lck_denied_grace_period; + return rpc_success; + } + + /* Obtain client and file */ + if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) + return rpc_success; + +#if 0 + /* If supplied state doesn't match current state, we assume it's + * an old request that time-warped somehow. Any error return would + * do in this case because it's irrelevant anyway. + * + * NB: We don't retrieve the remote host's state yet. + */ + if (host->h_nsmstate && host->h_nsmstate != argp->state) { + resp->status = nlm_lck_denied_nolocks; + } else +#endif + + /* Now try to lock the file */ + resp->status = nlmsvc_lock(rqstp, file, &argp->lock, + argp->block, argp->cookie); + + dprintk("lockd: LOCK status %ld\n", ntohl(resp->status)); + nlm_release_host(host); + nlm_release_file(file); + return rpc_success; +} + +static int +nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, + struct nlm_res *resp) +{ + struct nlm_host *host; + struct nlm_file *file; + + dprintk("lockd: CANCEL called\n"); + + resp->cookie = argp->cookie; + + /* Don't accept requests during grace period */ + if (nlmsvc_grace_period) { + resp->status = nlm_lck_denied_grace_period; + return rpc_success; + } + + /* Obtain client and file */ + if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) + return rpc_success; + + /* Try to cancel request. */ + resp->status = nlmsvc_cancel_blocked(file, &argp->lock); + + dprintk("lockd: CANCEL status %ld\n", ntohl(resp->status)); + nlm_release_host(host); + nlm_release_file(file); + return rpc_success; +} + +/* + * UNLOCK: release a lock + */ +static int +nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, + struct nlm_res *resp) +{ + struct nlm_host *host; + struct nlm_file *file; + + dprintk("lockd: UNLOCK called\n"); + + resp->cookie = argp->cookie; + + /* Don't accept new lock requests during grace period */ + if (nlmsvc_grace_period) { + resp->status = nlm_lck_denied_grace_period; + return rpc_success; + } + + /* Obtain client and file */ + if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) + return rpc_success; + + /* Now try to remove the lock */ + resp->status = nlmsvc_unlock(file, &argp->lock); + + dprintk("lockd: UNLOCK status %ld\n", ntohl(resp->status)); + nlm_release_host(host); + nlm_release_file(file); + return rpc_success; +} + +/* + * GRANTED: A server calls us to tell that a process' lock request + * was granted + */ +static int +nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, + struct nlm_res *resp) +{ + resp->cookie = argp->cookie; + + dprintk("lockd: GRANTED called\n"); + resp->status = nlmclnt_grant(&argp->lock); + dprintk("lockd: GRANTED status %ld\n", ntohl(resp->status)); + return rpc_success; +} + +/* + * `Async' versions of the above service routines. They aren't really, + * because we send the callback before the reply proper. I hope this + * doesn't break any clients. + */ +static int +nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp, + void *resp) +{ + struct nlm_res res; + u32 stat; + + dprintk("lockd: TEST_MSG called\n"); + + if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0) + stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res); + return stat; +} + +static int +nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp, + void *resp) +{ + struct nlm_res res; + u32 stat; + + dprintk("lockd: LOCK_MSG called\n"); + + if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0) + stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res); + return stat; +} + +static int +nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp, + void *resp) +{ + struct nlm_res res; + u32 stat; + + dprintk("lockd: CANCEL_MSG called\n"); + + if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0) + stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res); + return stat; +} + +static int +nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp, + void *resp) +{ + struct nlm_res res; + u32 stat; + + dprintk("lockd: UNLOCK_MSG called\n"); + + if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0) + stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res); + return stat; +} + +static int +nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp, + void *resp) +{ + struct nlm_res res; + u32 stat; + + dprintk("lockd: GRANTED_MSG called\n"); + + if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0) + stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res); + return stat; +} + +/* + * SHARE: create a DOS share or alter existing share. + */ +static int +nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, + struct nlm_res *resp) +{ + struct nlm_host *host; + struct nlm_file *file; + + dprintk("lockd: SHARE called\n"); + + resp->cookie = argp->cookie; + + /* Don't accept new lock requests during grace period */ + if (nlmsvc_grace_period && !argp->reclaim) { + resp->status = nlm_lck_denied_grace_period; + return rpc_success; + } + + /* Obtain client and file */ + if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) + return rpc_success; + + /* Now try to create the share */ + resp->status = nlmsvc_share_file(host, file, argp); + + dprintk("lockd: SHARE status %ld\n", ntohl(resp->status)); + nlm_release_host(host); + nlm_release_file(file); + return rpc_success; +} + +/* + * UNSHARE: Release a DOS share. + */ +static int +nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, + struct nlm_res *resp) +{ + struct nlm_host *host; + struct nlm_file *file; + + dprintk("lockd: UNSHARE called\n"); + + resp->cookie = argp->cookie; + + /* Don't accept requests during grace period */ + if (nlmsvc_grace_period) { + resp->status = nlm_lck_denied_grace_period; + return rpc_success; + } + + /* Obtain client and file */ + if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) + return rpc_success; + + /* Now try to lock the file */ + resp->status = nlmsvc_unshare_file(host, file, argp); + + dprintk("lockd: UNSHARE status %ld\n", ntohl(resp->status)); + nlm_release_host(host); + nlm_release_file(file); + return rpc_success; +} + +/* + * NM_LOCK: Create an unmonitored lock + */ +static int +nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp, + struct nlm_res *resp) +{ + dprintk("lockd: NM_LOCK called\n"); + + argp->monitor = 0; /* just clean the monitor flag */ + return nlmsvc_proc_lock(rqstp, argp, resp); +} + +/* + * FREE_ALL: Release all locks and shares held by client + */ +static int +nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp, + void *resp) +{ + struct nlm_host *host; + + /* Obtain client */ + if (nlmsvc_retrieve_args(rqstp, argp, &host, NULL)) + return rpc_success; + + nlmsvc_free_host_resources(host); + nlm_release_host(host); + return rpc_success; +} + +/* + * SM_NOTIFY: private callback from statd (not part of official NLM proto) + */ +static int +nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, + void *resp) +{ + struct sockaddr_in saddr = rqstp->rq_addr; + struct nlm_host *host; + + dprintk("lockd: SM_NOTIFY called\n"); + if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) + || ntohs(saddr.sin_port) >= 1024) { + printk(KERN_WARNING + "lockd: rejected NSM callback from %08lx:%d\n", + ntohl(rqstp->rq_addr.sin_addr.s_addr), + ntohs(rqstp->rq_addr.sin_port)); + return rpc_system_err; + } + + /* Obtain the host pointer for this NFS server and try to + * reclaim all locks we hold on this server. + */ + saddr.sin_addr.s_addr = argp->addr; + if ((host = nlm_lookup_host(NULL, &saddr, IPPROTO_UDP, 1)) != NULL) { + nlmclnt_recovery(host, argp->state); + nlm_release_host(host); + } + + /* If we run on an NFS server, delete all locks held by the client */ + if (nlmsvc_ops != NULL) { + struct svc_client *clnt; + saddr.sin_addr.s_addr = argp->addr; + if ((clnt = nlmsvc_ops->exp_getclient(&saddr)) != NULL + && (host = nlm_lookup_host(clnt, &saddr, 0, 0)) != NULL) { + nlmsvc_free_host_resources(host); + } + nlm_release_host(host); + } + + return rpc_success; +} + +/* + * This is the generic lockd callback for async RPC calls + */ +static u32 +nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) +{ + struct nlm_host *host; + struct nlm_rqst *call; + + if (!(call = nlmclnt_alloc_call())) + return rpc_system_err; + + host = nlmclnt_lookup_host(&rqstp->rq_addr, + rqstp->rq_prot, rqstp->rq_vers); + if (!host) { + kfree(call); + return rpc_system_err; + } + + call->a_flags = RPC_TASK_ASYNC; + call->a_host = host; + memcpy(&call->a_args, resp, sizeof(*resp)); + + if (nlmclnt_async_call(call, proc, nlmsvc_callback_exit) < 0) + return rpc_system_err; + + return rpc_success; +} + +static void +nlmsvc_callback_exit(struct rpc_task *task) +{ + struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; + + if (task->tk_status < 0) { + dprintk("lockd: %4d callback failed (errno = %d)\n", + task->tk_pid, -task->tk_status); + } + nlm_release_host(call->a_host); + rpc_release_task(task); + kfree(call); +} + +/* + * NLM Server procedures. + */ +#define nlmsvc_proc_none NULL +#define nlmsvc_encode_norep NULL +#define nlmsvc_decode_norep NULL +#define nlmsvc_decode_testres NULL +#define nlmsvc_proc_test_res NULL +#define nlmsvc_proc_lock_res NULL +#define nlmsvc_proc_cancel_res NULL +#define nlmsvc_proc_unlock_res NULL +#define nlmsvc_proc_granted_res NULL +struct nlm_void { int dummy; }; + +#define PROC(name, xargt, xrest, argt, rest) \ + { (svc_procfunc) nlmsvc_proc_##name, \ + (kxdrproc_t) nlmsvc_decode_##xargt, \ + (kxdrproc_t) nlmsvc_encode_##xrest, \ + NULL, \ + sizeof(struct nlm_##argt), \ + sizeof(struct nlm_##rest), \ + 0, \ + 0 \ + } +struct svc_procedure nlmsvc_procedures[] = { + PROC(null, void, void, void, void), + PROC(test, testargs, testres, args, res), + PROC(lock, lockargs, res, args, res), + PROC(cancel, cancargs, res, args, res), + PROC(unlock, unlockargs, res, args, res), + PROC(granted, testargs, res, args, res), + PROC(test_msg, testargs, norep, args, void), + PROC(lock_msg, lockargs, norep, args, void), + PROC(cancel_msg, cancargs, norep, args, void), + PROC(unlock_msg, unlockargs, norep, args, void), + PROC(granted_msg, testargs, norep, args, void), + PROC(test_res, testres, norep, res, void), + PROC(lock_res, res, norep, res, void), + PROC(cancel_res, res, norep, res, void), + PROC(unlock_res, res, norep, res, void), + PROC(granted_res, res, norep, res, void), + PROC(none, void, void, void, void), + PROC(none, void, void, void, void), + PROC(none, void, void, void, void), + PROC(none, void, void, void, void), + PROC(share, shareargs, shareres, args, res), + PROC(unshare, shareargs, shareres, args, res), + PROC(nm_lock, lockargs, res, args, res), + PROC(free_all, notify, void, args, void), + + /* statd callback */ + PROC(sm_notify, reboot, void, reboot, void), +}; diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c new file mode 100644 index 000000000..a8e7af942 --- /dev/null +++ b/fs/lockd/svcshare.c @@ -0,0 +1,111 @@ +/* + * linux/fs/lockd/svcshare.c + * + * Management of DOS shares. + * + * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/sched.h> +#include <linux/unistd.h> +#include <linux/malloc.h> + +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/lockd/lockd.h> +#include <linux/lockd/share.h> + +static inline int +nlm_cmp_owner(struct nlm_share *share, struct xdr_netobj *oh) +{ + return share->s_owner.len == oh->len + && !memcmp(share->s_owner.data, oh->data, oh->len); +} + +u32 +nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file, + struct nlm_args *argp) +{ + struct nlm_share *share; + struct xdr_netobj *oh = &argp->lock.oh; + u8 *ohdata; + + for (share = file->f_shares; share; share = share->s_next) { + if (share->s_host == host && nlm_cmp_owner(share, oh)) + goto update; + if ((argp->fsm_access & share->s_mode) + || (argp->fsm_mode & share->s_access )) + return nlm_lck_denied; + } + + share = (struct nlm_share *) kmalloc(sizeof(*share) + oh->len, + GFP_KERNEL); + if (share == NULL) + return nlm_lck_denied_nolocks; + + /* Copy owner handle */ + ohdata = (u8 *) (share + 1); + memcpy(ohdata, oh->data, oh->len); + + share->s_file = file; + share->s_host = host; + share->s_owner.data = ohdata; + share->s_owner.len = oh->len; + share->s_next = file->f_shares; + file->f_shares = share; + file->f_count += 1; + +update: + share->s_access = argp->fsm_access; + share->s_mode = argp->fsm_mode; + return nlm_granted; +} + +/* + * Delete a share. + */ +u32 +nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file, + struct nlm_args *argp) +{ + struct nlm_share *share, **shpp; + struct xdr_netobj *oh = &argp->lock.oh; + + for (shpp = &file->f_shares; (share = *shpp); shpp = &share->s_next) { + if (share->s_host == host && nlm_cmp_owner(share, oh)) { + *shpp = share->s_next; + kfree(share); + return nlm_granted; + } + } + + /* X/Open spec says return success even if there was no + * corresponding share. */ + return nlm_granted; +} + +/* + * Traverse all shares for a given file (and host). + * NLM_ACT_CHECK is handled by nlmsvc_inspect_file. + */ +int +nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action) +{ + struct nlm_share *share, **shpp; + + shpp = &file->f_shares; + while ((share = *shpp) != NULL) { + if (action == NLM_ACT_MARK) + share->s_host->h_inuse = 1; + else if (action == NLM_ACT_UNLOCK) { + if (host == NULL || host == share->s_host) { + *shpp = share->s_next; + kfree(share); + continue; + } + } + shpp = &share->s_next; + } + + return 0; +} diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c new file mode 100644 index 000000000..24093a615 --- /dev/null +++ b/fs/lockd/svcsubs.c @@ -0,0 +1,278 @@ +/* + * linux/fs/lockd/svcsubs.c + * + * Various support routines for the NLM server. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/in.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfsd/nfsfh.h> +#include <linux/nfsd/export.h> +#include <linux/lockd/lockd.h> +#include <linux/lockd/share.h> +#include <linux/lockd/sm_inter.h> + +#define NLMDBG_FACILITY NLMDBG_SVCSUBS + + +/* + * Global file hash table + */ +#define FILE_NRHASH 32 +#define FILE_HASH(dev, ino) (((dev) + (ino)) & FILE_NRHASH) +static struct nlm_file * nlm_files[FILE_NRHASH]; +static struct semaphore nlm_file_sema = MUTEX; + +/* + * Lookup file info. If it doesn't exist, create a file info struct + * and open a (VFS) file for the given inode. + * + * FIXME: + * Note that we open the file O_RDONLY even when creating write locks. + * This is not quite right, but for now, we assume the client performs + * the proper R/W checking. + */ +u32 +nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, + struct nfs_fh *f) +{ + struct nlm_file *file; + struct knfs_fh *fh = (struct knfs_fh *) f; + unsigned int hash = FILE_HASH(fh->fh_dev, fh->fh_ino); + u32 nfserr; + + dprintk("lockd: nlm_file_lookup(%04x/%ld)\n", fh->fh_dev, fh->fh_ino); + + /* Lock file table */ + down(&nlm_file_sema); + + for (file = nlm_files[hash]; file; file = file->f_next) { + if (file->f_handle.fh_ino == fh->fh_ino + && !memcmp(&file->f_handle, fh, sizeof(*fh))) + goto found; + } + + dprintk("lockd: creating file for %04x/%ld\n", fh->fh_dev, fh->fh_ino); + if (!(file = (struct nlm_file *) kmalloc(sizeof(*file), GFP_KERNEL))) { + up(&nlm_file_sema); + return nlm_lck_denied_nolocks; + } + + memset(file, 0, sizeof(*file)); + file->f_handle = *fh; + file->f_sema = MUTEX; + + /* Open the file. Note that this must not sleep for too long, else + * we would lock up lockd:-) So no NFS re-exports, folks. */ + if ((nfserr = nlmsvc_ops->fopen(rqstp, fh, &file->f_file)) != 0) { + dprintk("lockd: open failed (nfserr %ld)\n", ntohl(nfserr)); + kfree(file); + up(&nlm_file_sema); + return nlm_lck_denied; + } + + file->f_next = nlm_files[hash]; + nlm_files[hash] = file; + +found: + dprintk("lockd: found file %p (count %d)\n", file, file->f_count); + *result = file; + up(&nlm_file_sema); + file->f_count++; + return 0; +} + +/* + * Delete a file after having released all locks, blocks and shares + */ +static inline void +nlm_delete_file(struct nlm_file *file) +{ + struct inode *inode = nlmsvc_file_inode(file); + struct nlm_file **fp, *f; + + dprintk("lockd: closing file %04x/%ld\n", inode->i_dev, inode->i_ino); + fp = nlm_files + FILE_HASH(inode->i_dev, inode->i_ino); + while ((f = *fp) != NULL) { + if (f == file) { + *fp = file->f_next; + nlmsvc_ops->fclose(&file->f_file); + kfree(file); + return; + } + fp = &file->f_next; + } + + printk(KERN_WARNING "lockd: attempt to release unknown file!\n"); +} + +/* + * Loop over all locks on the given file and perform the specified + * action. + */ +static int +nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action) +{ + struct inode *inode = nlmsvc_file_inode(file); + struct file_lock *fl; + struct nlm_host *lockhost; + +again: + file->f_locks = 0; + for (fl = inode->i_flock; fl; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_LOCKD)) + continue; + + /* update current lock count */ + file->f_locks++; + lockhost = (struct nlm_host *) fl->fl_owner; + if (action == NLM_ACT_MARK) + lockhost->h_inuse = 1; + else if (action == NLM_ACT_CHECK) + return 1; + else if (action == NLM_ACT_UNLOCK) { + struct file_lock lock = *fl; + + if (host && lockhost != host) + continue; + + lock.fl_type = F_UNLCK; + lock.fl_start = 0; + lock.fl_end = NLM_OFFSET_MAX; + if (posix_lock_file(&file->f_file, &lock, 0) < 0) { + printk("lockd: unlock failure in %s:%d\n", + __FILE__, __LINE__); + return 1; + } + goto again; + } + } + + return 0; +} + +/* + * Operate on a single file + */ +static inline int +nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action) +{ + if (action == NLM_ACT_CHECK) { + /* Fast path for mark and sweep garbage collection */ + if (file->f_count || file->f_blocks || file->f_shares) + return 1; + } else { + if (nlmsvc_traverse_blocks(host, file, action) + || nlmsvc_traverse_shares(host, file, action)) + return 1; + } + return nlm_traverse_locks(host, file, action); +} + +/* + * Loop over all files in the file table. + */ +static int +nlm_traverse_files(struct nlm_host *host, int action) +{ + struct nlm_file *file, **fp; + int i; + + down(&nlm_file_sema); + for (i = 0; i < FILE_NRHASH; i++) { + fp = nlm_files + i; + while ((file = *fp) != NULL) { + /* Traverse locks, blocks and shares of this file + * and update file->f_locks count */ + if (nlm_inspect_file(host, file, action)) { + up(&nlm_file_sema); + return 1; + } + + /* No more references to this file. Let go of it. */ + if (!file->f_blocks && !file->f_locks + && !file->f_shares && !file->f_count) { + *fp = file->f_next; + nlmsvc_ops->fclose(&file->f_file); + kfree(file); + } else { + fp = &file->f_next; + } + } + } + up(&nlm_file_sema); + return 0; +} + +/* + * Release file. If there are no more remote locks on this file, + * close it and free the handle. + * + * Note that we can't do proper reference counting without major + * contortions because the code in fs/locks.c creates, deletes and + * splits locks without notification. Our only way is to walk the + * entire lock list each time we remove a lock. + */ +void +nlm_release_file(struct nlm_file *file) +{ + dprintk("lockd: nlm_release_file(%p, ct = %d)\n", + file, file->f_count); + + /* Lock file table */ + down(&nlm_file_sema); + + /* If there are no more locks etc, delete the file */ + if (--(file->f_count) == 0 + && !nlm_inspect_file(NULL, file, NLM_ACT_CHECK)) + nlm_delete_file(file); + + up(&nlm_file_sema); + return; +} + +/* + * Mark all hosts that still hold resources + */ +void +nlmsvc_mark_resources(void) +{ + dprintk("lockd: nlmsvc_mark_resources\n"); + + nlm_traverse_files(NULL, NLM_ACT_MARK); +} + +/* + * Release all resources held by the given client + */ +void +nlmsvc_free_host_resources(struct nlm_host *host) +{ + dprintk("lockd: nlmsvc_free_host_resources\n"); + + if (nlm_traverse_files(host, NLM_ACT_UNLOCK)) + printk(KERN_WARNING + "lockd: couldn't remove all locks held by %s", + host->h_name); +} + +/* + * Delete a client when the nfsd entry is removed. + */ +void +nlmsvc_invalidate_client(struct svc_client *clnt) +{ + struct nlm_host *host; + + if ((host = nlm_lookup_host(clnt, NULL, 0, 0)) != NULL) { + dprintk("lockd: invalidating client for %s\n", host->h_name); + nlmsvc_free_host_resources(host); + host->h_expires = 0; + nlm_release_host(host); + } +} diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c new file mode 100644 index 000000000..9c23733ac --- /dev/null +++ b/fs/lockd/xdr.c @@ -0,0 +1,605 @@ +/* + * linux/fs/lockd/xdr.c + * + * XDR support for lockd and the lock client. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/utsname.h> +#include <linux/nfs.h> + +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/stats.h> +#include <linux/lockd/lockd.h> +#include <linux/lockd/sm_inter.h> + +#define NLMDBG_FACILITY NLMDBG_XDR +#define NLM_MAXSTRLEN 1024 + +#define QUADLEN(len) (((len) + 3) >> 2) + + +u32 nlm_granted, nlm_lck_denied, nlm_lck_denied_nolocks, + nlm_lck_blocked, nlm_lck_denied_grace_period; + + +typedef struct nlm_args nlm_args; + +/* + * Initialization of NFS status variables + */ +void +nlmxdr_init(void) +{ + static int inited = 0; + + if (inited) + return; + + nlm_granted = htonl(NLM_LCK_GRANTED); + nlm_lck_denied = htonl(NLM_LCK_DENIED); + nlm_lck_denied_nolocks = htonl(NLM_LCK_DENIED_NOLOCKS); + nlm_lck_blocked = htonl(NLM_LCK_BLOCKED); + nlm_lck_denied_grace_period = htonl(NLM_LCK_DENIED_GRACE_PERIOD); + + inited = 1; +} + +/* + * XDR functions for basic NLM types + */ +static inline u32 * +nlm_decode_cookie(u32 *p, u32 *c) +{ + unsigned int len; + + if ((len = ntohl(*p++)) == 4) { + *c = ntohl(*p++); + } else if (len == 0) { /* hockeypux brain damage */ + *c = 0; + } else { + printk(KERN_NOTICE + "lockd: bad cookie size %d (should be 4)\n", len); + return NULL; + } + return p; +} + +static inline u32 * +nlm_encode_cookie(u32 *p, u32 c) +{ + *p++ = htonl(sizeof(c)); + *p++ = htonl(c); + return p; +} + +static inline u32 * +nlm_decode_fh(u32 *p, struct nfs_fh *f) +{ + unsigned int len; + + if ((len = ntohl(*p++)) != sizeof(*f)) { + printk(KERN_NOTICE + "lockd: bad fhandle size %x (should be %d)\n", + len, sizeof(*f)); + return NULL; + } + memcpy(f, p, sizeof(*f)); + return p + XDR_QUADLEN(sizeof(*f)); +} + +static inline u32 * +nlm_encode_fh(u32 *p, struct nfs_fh *f) +{ + *p++ = htonl(sizeof(*f)); + memcpy(p, f, sizeof(*f)); + return p + XDR_QUADLEN(sizeof(*f)); +} + +/* + * Encode and decode owner handle + */ +static inline u32 * +nlm_decode_oh(u32 *p, struct xdr_netobj *oh) +{ + return xdr_decode_netobj(p, oh); +} + +static inline u32 * +nlm_encode_oh(u32 *p, struct xdr_netobj *oh) +{ + return xdr_encode_netobj(p, oh); +} + +static inline u32 * +nlm_decode_lock(u32 *p, struct nlm_lock *lock) +{ + struct file_lock *fl = &lock->fl; + int len; + + if (!(p = xdr_decode_string(p, &lock->caller, &len, NLM_MAXSTRLEN)) + || !(p = nlm_decode_fh(p, &lock->fh)) + || !(p = nlm_decode_oh(p, &lock->oh))) + return NULL; + + memset(fl, 0, sizeof(*fl)); + fl->fl_owner = current; + fl->fl_pid = ntohl(*p++); + fl->fl_flags = FL_POSIX; + fl->fl_type = F_RDLCK; /* as good as anything else */ + fl->fl_start = ntohl(*p++); + len = ntohl(*p++); + if (len == 0 || (fl->fl_end = fl->fl_start + len - 1) < 0) + fl->fl_end = NLM_OFFSET_MAX; + return p; +} + +/* + * Encode a lock as part of an NLM call + */ +static u32 * +nlm_encode_lock(u32 *p, struct nlm_lock *lock) +{ + struct file_lock *fl = &lock->fl; + + if (!(p = xdr_encode_string(p, lock->caller)) + || !(p = nlm_encode_fh(p, &lock->fh)) + || !(p = nlm_encode_oh(p, &lock->oh))) + return NULL; + + *p++ = htonl(fl->fl_pid); + *p++ = htonl(lock->fl.fl_start); + if (lock->fl.fl_end == NLM_OFFSET_MAX) + *p++ = xdr_zero; + else + *p++ = htonl(lock->fl.fl_end - lock->fl.fl_start + 1); + + return p; +} + +/* + * Encode result of a TEST/TEST_MSG call + */ +static u32 * +nlm_encode_testres(u32 *p, struct nlm_res *resp) +{ + if (!(p = nlm_encode_cookie(p, resp->cookie))) + return 0; + *p++ = resp->status; + + if (resp->status == nlm_lck_denied) { + struct file_lock *fl = &resp->lock.fl; + + *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one; + *p++ = htonl(fl->fl_pid); + + /* Encode owner handle. */ + if (!(p = xdr_encode_netobj(p, &resp->lock.oh))) + return 0; + + *p++ = htonl(fl->fl_start); + if (fl->fl_end == NLM_OFFSET_MAX) + *p++ = xdr_zero; + else + *p++ = htonl(fl->fl_end - fl->fl_start + 1); + } + + return p; +} + +/* + * Check buffer bounds after decoding arguments + */ +static inline int +xdr_argsize_check(struct svc_rqst *rqstp, u32 *p) +{ + struct svc_buf *buf = &rqstp->rq_argbuf; + + return p - buf->base <= buf->buflen; +} + +static inline int +xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) +{ + struct svc_buf *buf = &rqstp->rq_resbuf; + + buf->len = p - buf->base; + return (buf->len <= buf->buflen); +} + +/* + * First, the server side XDR functions + */ +int +nlmsvc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp) +{ + u32 exclusive; + + if (!(p = nlm_decode_cookie(p, &argp->cookie))) + return 0; + + exclusive = ntohl(*p++); + if (!(p = nlm_decode_lock(p, &argp->lock))) + return 0; + if (exclusive) + argp->lock.fl.fl_type = F_WRLCK; + + return xdr_argsize_check(rqstp, p); +} + +int +nlmsvc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp) +{ + if (!(p = nlm_encode_testres(p, resp))) + return 0; + return xdr_ressize_check(rqstp, p); +} + +int +nlmsvc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp) +{ + u32 exclusive; + + if (!(p = nlm_decode_cookie(p, &argp->cookie))) + return 0; + argp->block = ntohl(*p++); + exclusive = ntohl(*p++); + if (!(p = nlm_decode_lock(p, &argp->lock))) + return 0; + if (exclusive) + argp->lock.fl.fl_type = F_WRLCK; + argp->reclaim = ntohl(*p++); + argp->state = ntohl(*p++); + argp->monitor = 1; /* monitor client by default */ + + return xdr_argsize_check(rqstp, p); +} + +int +nlmsvc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp) +{ + u32 exclusive; + + if (!(p = nlm_decode_cookie(p, &argp->cookie))) + return 0; + argp->block = ntohl(*p++); + exclusive = ntohl(*p++); + if (!(p = nlm_decode_lock(p, &argp->lock))) + return 0; + if (exclusive) + argp->lock.fl.fl_type = F_WRLCK; + return xdr_argsize_check(rqstp, p); +} + +int +nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp) +{ + if (!(p = nlm_decode_cookie(p, &argp->cookie)) + || !(p = nlm_decode_lock(p, &argp->lock))) + return 0; + argp->lock.fl.fl_type = F_UNLCK; + return xdr_argsize_check(rqstp, p); +} + +int +nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp) +{ + struct nlm_lock *lock = &argp->lock; + int len; + + memset(lock, 0, sizeof(*lock)); + lock->fl.fl_pid = ~(u32) 0; + + if (!(p = nlm_decode_cookie(p, &argp->cookie)) + || !(p = xdr_decode_string(p, &lock->caller, &len, NLM_MAXSTRLEN)) + || !(p = nlm_decode_fh(p, &lock->fh)) + || !(p = nlm_decode_oh(p, &lock->oh))) + return 0; + argp->fsm_mode = ntohl(*p++); + argp->fsm_access = ntohl(*p++); + return xdr_argsize_check(rqstp, p); +} + +int +nlmsvc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp) +{ + if (!(p = nlm_encode_cookie(p, resp->cookie))) + return 0; + *p++ = resp->status; + *p++ = xdr_zero; /* sequence argument */ + return xdr_ressize_check(rqstp, p); +} + +int +nlmsvc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp) +{ + if (!(p = nlm_encode_cookie(p, resp->cookie))) + return 0; + *p++ = resp->status; + return xdr_ressize_check(rqstp, p); +} + +int +nlmsvc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp) +{ + struct nlm_lock *lock = &argp->lock; + int len; + + if (!(p = xdr_decode_string(p, &lock->caller, &len, NLM_MAXSTRLEN))) + return 0; + argp->state = ntohl(*p++); + return xdr_argsize_check(rqstp, p); +} + +int +nlmsvc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp) +{ + if (!(p = xdr_decode_string(p, &argp->mon, &argp->len, SM_MAXSTRLEN))) + return 0; + argp->state = ntohl(*p++); + argp->addr = ntohl(*p++); + return xdr_argsize_check(rqstp, p); +} + +int +nlmsvc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp) +{ + if (!(p = nlm_decode_cookie(p, &resp->cookie))) + return 0; + resp->status = ntohl(*p++); + return xdr_argsize_check(rqstp, p); +} + +int +nlmsvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy) +{ + return xdr_argsize_check(rqstp, p); +} + +int +nlmsvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +/* + * Now, the client side XDR functions + */ +static int +nlmclt_encode_void(struct rpc_rqst *req, u32 *p, void *ptr) +{ + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int +nlmclt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr) +{ + return 0; +} + +static int +nlmclt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp) +{ + struct nlm_lock *lock = &argp->lock; + + if (!(p = nlm_encode_cookie(p, argp->cookie))) + return -EIO; + *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero; + if (!(p = nlm_encode_lock(p, lock))) + return -EIO; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int +nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp) +{ + if (!(p = nlm_decode_cookie(p, &resp->cookie))) + return -EIO; + resp->status = ntohl(*p++); + if (resp->status == NLM_LCK_DENIED) { + struct file_lock *fl = &resp->lock.fl; + u32 excl, len; + + memset(&resp->lock, 0, sizeof(resp->lock)); + excl = ntohl(*p++); + fl->fl_pid = ntohl(*p++); + if (!(p = nlm_decode_oh(p, &resp->lock.oh))) + return -EIO; + + fl->fl_flags = FL_POSIX; + fl->fl_type = excl? F_WRLCK : F_RDLCK; + fl->fl_start = ntohl(*p++); + len = ntohl(*p++); + if (len == 0 || (fl->fl_end = fl->fl_start + len - 1) < 0) + fl->fl_end = NLM_OFFSET_MAX; + } + return 0; +} + + +static int +nlmclt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp) +{ + struct nlm_lock *lock = &argp->lock; + + if (!(p = nlm_encode_cookie(p, argp->cookie))) + return -EIO; + *p++ = argp->block? xdr_one : xdr_zero; + *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero; + if (!(p = nlm_encode_lock(p, lock))) + return -EIO; + *p++ = argp->reclaim? xdr_one : xdr_zero; + *p++ = htonl(argp->state); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int +nlmclt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp) +{ + struct nlm_lock *lock = &argp->lock; + + if (!(p = nlm_encode_cookie(p, argp->cookie))) + return -EIO; + *p++ = argp->block? xdr_one : xdr_zero; + *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero; + if (!(p = nlm_encode_lock(p, lock))) + return -EIO; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int +nlmclt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp) +{ + struct nlm_lock *lock = &argp->lock; + + if (!(p = nlm_encode_cookie(p, argp->cookie))) + return -EIO; + if (!(p = nlm_encode_lock(p, lock))) + return -EIO; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int +nlmclt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp) +{ + if (!(p = nlm_encode_cookie(p, resp->cookie))) + return -EIO; + *p++ = resp->status; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int +nlmclt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp) +{ + if (!(p = nlm_encode_testres(p, resp))) + return -EIO; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int +nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp) +{ + if (!(p = nlm_decode_cookie(p, &resp->cookie))) + return -EIO; + resp->status = ntohl(*p++); + return 0; +} + +/* + * Buffer requirements for NLM + */ +#define NLM_void_sz 0 +#define NLM_cookie_sz 2 +#define NLM_caller_sz 1+QUADLEN(sizeof(system_utsname.nodename)) +#define NLM_netobj_sz 1+QUADLEN(XDR_MAX_NETOBJ) +/* #define NLM_owner_sz 1+QUADLEN(NLM_MAXOWNER) */ +#define NLM_fhandle_sz 1+QUADLEN(NFS_FHSIZE) +#define NLM_lock_sz 3+NLM_caller_sz+NLM_netobj_sz+NLM_fhandle_sz +#define NLM_holder_sz 4+NLM_netobj_sz + +#define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz +#define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz +#define NLM_cancargs_sz NLM_cookie_sz+2+NLM_lock_sz +#define NLM_unlockargs_sz NLM_cookie_sz+NLM_lock_sz + +#define NLM_testres_sz NLM_cookie_sz+1+NLM_holder_sz +#define NLM_res_sz NLM_cookie_sz+1 +#define NLM_norep_sz 0 + +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +/* + * For NLM, a void procedure really returns nothing + */ +#define nlmclt_decode_norep NULL + +#define PROC(proc, argtype, restype) \ + { "nlm_" #proc, \ + (kxdrproc_t) nlmclt_encode_##argtype, \ + (kxdrproc_t) nlmclt_decode_##restype, \ + MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \ + } + +static struct rpc_procinfo nlm_procedures[] = { + PROC(null, void, void), + PROC(test, testargs, testres), + PROC(lock, lockargs, res), + PROC(canc, cancargs, res), + PROC(unlock, unlockargs, res), + PROC(granted, testargs, res), + PROC(test_msg, testargs, norep), + PROC(lock_msg, lockargs, norep), + PROC(canc_msg, cancargs, norep), + PROC(unlock_msg, unlockargs, norep), + PROC(granted_msg, testargs, norep), + PROC(test_res, testres, norep), + PROC(lock_res, res, norep), + PROC(canc_res, res, norep), + PROC(unlock_res, res, norep), + PROC(granted_res, res, norep), + PROC(undef, void, void), + PROC(undef, void, void), + PROC(undef, void, void), + PROC(undef, void, void), +#ifdef NLMCLNT_SUPPORT_SHARES + PROC(share, shareargs, shareres), + PROC(unshare, shareargs, shareres), + PROC(nm_lock, lockargs, res), + PROC(free_all, notify, void), +#else + PROC(undef, void, void), + PROC(undef, void, void), + PROC(undef, void, void), + PROC(undef, void, void), +#endif +}; + +static struct rpc_version nlm_version1 = { + 1, 16, nlm_procedures, +}; + +static struct rpc_version nlm_version3 = { + 3, 24, nlm_procedures, +}; + +static struct rpc_version * nlm_versions[] = { + NULL, + &nlm_version1, + NULL, + &nlm_version3, +}; + +static struct rpc_stat nlm_stats; + +struct rpc_program nlm_program = { + "lockd", + NLM_PROGRAM, + sizeof(nlm_versions) / sizeof(nlm_versions[0]), + nlm_versions, + &nlm_stats, +}; + +#ifdef LOCKD_DEBUG +char * +nlm_procname(u32 proc) +{ + if (proc < sizeof(nlm_procedures)/sizeof(nlm_procedures[0])) + return nlm_procedures[proc].p_procname; + return "unknown"; +} +#endif + diff --git a/fs/locks.c b/fs/locks.c index 50f3709c7..3aa530897 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -97,6 +97,9 @@ * Made mandatory locking a mount option. Default is not to allow mandatory * locking. * Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996. + * + * Some adaptations for NFS support. + * Olaf Kirch (okir@monad.swb.de), Dec 1996, */ #include <linux/malloc.h> @@ -105,6 +108,8 @@ #include <linux/errno.h> #include <linux/stat.h> #include <linux/fcntl.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -121,12 +126,8 @@ static int posix_locks_conflict(struct file_lock *caller_fl, static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl); static int flock_lock_file(struct file *filp, struct file_lock *caller, unsigned int wait); -static int posix_lock_file(struct file *filp, struct file_lock *caller, - unsigned int wait); -static int posix_locks_deadlock(struct task_struct *my_task, - struct task_struct *blocked_task); -static void posix_remove_locks(struct file_lock **before, struct task_struct *task); -static void flock_remove_locks(struct file_lock **before, struct file *filp); +static int posix_locks_deadlock(struct file_lock *caller, + struct file_lock *blocker); static struct file_lock *locks_alloc_lock(struct file_lock *fl); static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl); @@ -137,7 +138,7 @@ static void locks_insert_block(struct file_lock *blocker, struct file_lock *wait static void locks_delete_block(struct file_lock *blocker, struct file_lock *waiter); static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait); -static struct file_lock *file_lock_table = NULL; +struct file_lock *file_lock_table = NULL; /* Free lock not inserted in any queue. */ @@ -161,6 +162,15 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) (fl2->fl_end >= fl1->fl_start)); } +/* Check whether two locks have the same owner + */ +static inline int +locks_same_owner(struct file_lock *fl1, struct file_lock *fl2) +{ + return (fl1->fl_owner == fl2->fl_owner) && + (fl1->fl_pid == fl2->fl_pid); +} + /* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in * the order they blocked. The documentation doesn't require this but @@ -211,6 +221,21 @@ static void locks_delete_block(struct file_lock *blocker, return; } +/* The following two are for the benefit of lockd. + */ +void +posix_block_lock(struct file_lock *blocker, struct file_lock *waiter) +{ + locks_insert_block(blocker, waiter); +} + +void +posix_unblock_lock(struct file_lock *waiter) +{ + if (waiter->fl_prevblock) + locks_delete_block(waiter->fl_prevblock, waiter); +} + /* Wake up processes blocked waiting for blocker. * If told to wait then schedule the processes until the block list * is empty, otherwise empty the block list ourselves. @@ -220,6 +245,8 @@ static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait) struct file_lock *waiter; while ((waiter = blocker->fl_nextblock) != NULL) { + if (waiter->fl_notify) + waiter->fl_notify(waiter); wake_up(&waiter->fl_wait); if (wait) /* Let the blocked process remove waiter from the @@ -242,17 +269,20 @@ asmlinkage int sys_flock(unsigned int fd, unsigned int cmd) { struct file_lock file_lock; struct file *filp; + int err; + lock_kernel(); if ((fd >= NR_OPEN) || !(filp = current->files->fd[fd])) - return (-EBADF); - - if (!flock_make_lock(filp, &file_lock, cmd)) - return (-EINVAL); - - if ((file_lock.fl_type != F_UNLCK) && !(filp->f_mode & 3)) - return (-EBADF); - - return (flock_lock_file(filp, &file_lock, (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1)); + err = -EBADF; + else if (!flock_make_lock(filp, &file_lock, cmd)) + err = -EINVAL; + else if ((file_lock.fl_type != F_UNLCK) && !(filp->f_mode & 3)) + err = -EBADF; + else + err = flock_lock_file(filp, &file_lock, + (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1); + unlock_kernel(); + return err; } /* Report the first existing lock that would conflict with l. @@ -260,58 +290,57 @@ asmlinkage int sys_flock(unsigned int fd, unsigned int cmd) */ int fcntl_getlk(unsigned int fd, struct flock *l) { - int error; struct flock flock; struct file *filp; struct file_lock *fl,file_lock; + int error; if ((fd >= NR_OPEN) || !(filp = current->files->fd[fd])) return (-EBADF); - error = verify_area(VERIFY_WRITE, l, sizeof(*l)); - if (error) - return (error); + if (copy_from_user(&flock, l, sizeof(flock))) + return -EFAULT; - copy_from_user(&flock, l, sizeof(flock)); - if ((flock.l_type == F_UNLCK) || (flock.l_type == F_EXLCK) || - (flock.l_type == F_SHLCK)) + if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) return (-EINVAL); if (!filp->f_inode || !posix_make_lock(filp, &file_lock, &flock)) return (-EINVAL); - if ((fl = filp->f_inode->i_flock) && (fl->fl_flags & FL_POSIX)) { - while (fl != NULL) { - if (posix_locks_conflict(&file_lock, fl)) { - flock.l_pid = fl->fl_owner->pid; - flock.l_start = fl->fl_start; - flock.l_len = fl->fl_end == OFFSET_MAX ? 0 : - fl->fl_end - fl->fl_start + 1; - flock.l_whence = 0; - flock.l_type = fl->fl_type; - copy_to_user(l, &flock, sizeof(flock)); - return (0); - } - fl = fl->fl_next; - } + if (filp->f_op->lock) { + error = filp->f_op->lock(filp->f_inode, filp, + F_GETLK, &file_lock); + if (error < 0) + return error; + fl = &file_lock; + } else { + fl = posix_test_lock(filp, &file_lock); } - - flock.l_type = F_UNLCK; /* no conflict found */ - copy_to_user(l, &flock, sizeof(flock)); - return (0); + + if (fl != NULL) { + flock.l_pid = fl->fl_pid; + flock.l_start = fl->fl_start; + flock.l_len = fl->fl_end == OFFSET_MAX ? 0 : + fl->fl_end - fl->fl_start + 1; + flock.l_whence = 0; + flock.l_type = fl->fl_type; + return copy_to_user(l, &flock, sizeof(flock)) ? -EFAULT : 0; + } else { + flock.l_type = F_UNLCK; + } + + return copy_to_user(l, &flock, sizeof(flock)) ? -EFAULT : 0; } /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). - * It also emulates flock() in a pretty broken way for older C - * libraries. */ int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) { - int error; struct file *filp; struct file_lock file_lock; struct flock flock; struct inode *inode; + int error; /* Get arguments and validate them ... */ @@ -319,10 +348,6 @@ int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) if ((fd >= NR_OPEN) || !(filp = current->files->fd[fd])) return (-EBADF); - error = verify_area(VERIFY_READ, l, sizeof(*l)); - if (error) - return (error); - if (!(inode = filp->f_inode)) return (-EINVAL); @@ -340,22 +365,25 @@ int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) } while (vma != inode->i_mmap); } - copy_from_user(&flock, l, sizeof(flock)); + if (copy_from_user(&flock, l, sizeof(flock))) + return -EFAULT; if (!posix_make_lock(filp, &file_lock, &flock)) return (-EINVAL); switch (flock.l_type) { - case F_RDLCK : + case F_RDLCK: if (!(filp->f_mode & 1)) return (-EBADF); break; - case F_WRLCK : + case F_WRLCK: if (!(filp->f_mode & 2)) return (-EBADF); break; - case F_SHLCK : - case F_EXLCK : -#if 1 + case F_UNLCK: + break; + case F_SHLCK: + case F_EXLCK: +#ifdef __sparc__ /* warn a bit for now, but don't overdo it */ { static int count = 0; @@ -366,16 +394,20 @@ int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) current->pid, current->comm); } } -#endif if (!(filp->f_mode & 3)) return (-EBADF); break; - case F_UNLCK : - break; +#endif default: return -EINVAL; } - + + if (filp->f_op->lock != NULL) { + error = filp->f_op->lock(filp->f_inode, filp, cmd, &file_lock); + if (error < 0) + return error; + } + return (posix_lock_file(filp, &file_lock, cmd == F_SETLKW)); } @@ -383,48 +415,49 @@ int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) */ void locks_remove_locks(struct task_struct *task, struct file *filp) { - struct file_lock *fl; + struct file_lock file_lock, *fl; + struct file_lock **before; /* For POSIX locks we free all locks on this file for the given task. * For FLOCK we only free locks on this *open* file if it is the last * close on that file. */ - if ((fl = filp->f_inode->i_flock) != NULL) { - if (fl->fl_flags & FL_POSIX) - posix_remove_locks(&filp->f_inode->i_flock, task); - else - flock_remove_locks(&filp->f_inode->i_flock, filp); - } - - return; -} - -static void posix_remove_locks(struct file_lock **before, struct task_struct *task) -{ - struct file_lock *fl; + before = &filp->f_inode->i_flock; while ((fl = *before) != NULL) { - if (fl->fl_owner == task) + if (((fl->fl_flags & FL_POSIX) && (fl->fl_owner == task)) || + ((fl->fl_flags & FL_FLOCK) && (fl->fl_file == filp) && + (filp->f_count == 1))) { + file_lock = *fl; locks_delete_lock(before, 0); - else + if (filp->f_op->lock) { + file_lock.fl_type = F_UNLCK; + filp->f_op->lock(filp->f_inode, filp, + F_SETLK, &file_lock); + /* List may have changed: */ + before = &filp->f_inode->i_flock; + } + } else { before = &fl->fl_next; + } } return; } -static void flock_remove_locks(struct file_lock **before, struct file *filp) +struct file_lock * +posix_test_lock(struct file *filp, struct file_lock *fl) { - struct file_lock *fl; + struct file_lock *cfl; - while ((fl = *before) != NULL) { - if ((fl->fl_file == filp) && (filp->f_count == 1)) - locks_delete_lock(before, 0); - else - before = &fl->fl_next; + for (cfl = filp->f_inode->i_flock; cfl; cfl = cfl->fl_next) { + if (!(cfl->fl_flags & FL_POSIX)) + continue; + if (posix_locks_conflict(cfl, fl)) + break; } - return; + return cfl; } int locks_verify_locked(struct inode *inode) @@ -457,13 +490,11 @@ int locks_mandatory_locked(struct inode *inode) /* Search the lock list for this inode for any POSIX locks. */ - if ((fl = inode->i_flock) == NULL || (fl->fl_flags & FL_FLOCK)) - return (0); - - while (fl != NULL) { + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; if (fl->fl_owner != current) return (-EAGAIN); - fl = fl->fl_next; } return (0); } @@ -475,29 +506,23 @@ int locks_mandatory_area(int read_write, struct inode *inode, struct file_lock *fl; struct file_lock tfl; + memset(&tfl, 0, sizeof(tfl)); + tfl.fl_file = filp; - tfl.fl_nextlink = NULL; - tfl.fl_prevlink = NULL; - tfl.fl_next = NULL; - tfl.fl_nextblock = NULL; - tfl.fl_prevblock = NULL; tfl.fl_flags = FL_POSIX | FL_ACCESS; tfl.fl_owner = current; - tfl.fl_wait = NULL; + tfl.fl_pid = current->pid; tfl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; tfl.fl_start = offset; tfl.fl_end = offset + count - 1; repeat: - /* Check that there are locks, and that they're not FL_FLOCK locks. - */ - if ((fl = inode->i_flock) == NULL || (fl->fl_flags & FL_FLOCK)) - return (0); - /* Search the lock list for this inode for locks that conflict with * the proposed read/write. */ - while (fl != NULL) { + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; /* Block for writes against a "read" lock, * and both reads and writes against a "write" lock. */ @@ -506,7 +531,7 @@ repeat: return (-EAGAIN); if (current->signal & ~current->blocked) return (-ERESTARTSYS); - if (posix_locks_deadlock(current, fl->fl_owner)) + if (posix_locks_deadlock(&tfl, fl)) return (-EDEADLK); locks_insert_block(fl, &tfl); @@ -522,7 +547,6 @@ repeat: break; goto repeat; } - fl = fl->fl_next; } return (0); } @@ -535,37 +559,31 @@ static int posix_make_lock(struct file *filp, struct file_lock *fl, { off_t start; + memset(fl, 0, sizeof(*fl)); + fl->fl_flags = FL_POSIX; switch (l->l_type) { - case F_RDLCK : - case F_WRLCK : - case F_UNLCK : + case F_RDLCK: + case F_WRLCK: + case F_UNLCK: fl->fl_type = l->l_type; break; - case F_SHLCK : - fl->fl_type = F_RDLCK; - fl->fl_flags |= FL_BROKEN; - break; - case F_EXLCK : - fl->fl_type = F_WRLCK; - fl->fl_flags |= FL_BROKEN; - break; - default : + default: return (0); } switch (l->l_whence) { - case 0 : /*SEEK_SET*/ + case 0: /*SEEK_SET*/ start = 0; break; - case 1 : /*SEEK_CUR*/ + case 1: /*SEEK_CUR*/ start = filp->f_pos; break; - case 2 : /*SEEK_END*/ + case 2: /*SEEK_END*/ start = filp->f_inode->i_size; break; - default : + default: return (0); } @@ -577,7 +595,7 @@ static int posix_make_lock(struct file *filp, struct file_lock *fl, fl->fl_file = filp; fl->fl_owner = current; - fl->fl_wait = NULL; /* just for cleanliness */ + fl->fl_pid = current->pid; return (1); } @@ -588,20 +606,22 @@ static int posix_make_lock(struct file *filp, struct file_lock *fl, static int flock_make_lock(struct file *filp, struct file_lock *fl, unsigned int cmd) { + memset(fl, 0, sizeof(*fl)); + if (!filp->f_inode) /* just in case */ return (0); switch (cmd & ~LOCK_NB) { - case LOCK_SH : + case LOCK_SH: fl->fl_type = F_RDLCK; break; - case LOCK_EX : + case LOCK_EX: fl->fl_type = F_WRLCK; break; - case LOCK_UN : + case LOCK_UN: fl->fl_type = F_UNLCK; break; - default : + default: return (0); } @@ -610,7 +630,6 @@ static int flock_make_lock(struct file *filp, struct file_lock *fl, fl->fl_end = OFFSET_MAX; fl->fl_file = filp; fl->fl_owner = NULL; - fl->fl_wait = NULL; /* just for cleanliness */ return (1); } @@ -623,7 +642,8 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s /* POSIX locks owned by the same process do not conflict with * each other. */ - if (caller_fl->fl_owner == sys_fl->fl_owner) + if (!(sys_fl->fl_flags & FL_POSIX) || + locks_same_owner(caller_fl, sys_fl)) return (0); return (locks_conflict(caller_fl, sys_fl)); @@ -637,7 +657,8 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s /* FLOCK locks referring to the same filp do not conflict with * each other. */ - if (caller_fl->fl_file == sys_fl->fl_file) + if (!(sys_fl->fl_flags & FL_FLOCK) || + (caller_fl->fl_file == sys_fl->fl_file)) return (0); return (locks_conflict(caller_fl, sys_fl)); @@ -652,10 +673,10 @@ static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) return (0); switch (caller_fl->fl_type) { - case F_RDLCK : + case F_RDLCK: return (sys_fl->fl_type == F_WRLCK); - case F_WRLCK : + case F_WRLCK: return (1); default: @@ -675,25 +696,39 @@ static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) * at a time. When we find blocked_task on a wait queue we can re-search * with blocked_task equal to that queue's owner, until either blocked_task * isn't found, or blocked_task is found on a queue owned by my_task. + * + * Note: the above assumption may not be true when handling lock requests + * from a broken NFS client. But broken NFS clients have a lot more to + * worry about than proper deadlock detection anyway... --okir */ -static int posix_locks_deadlock(struct task_struct *my_task, - struct task_struct *blocked_task) +static int posix_locks_deadlock(struct file_lock *caller_fl, + struct file_lock *block_fl) { struct file_lock *fl; struct file_lock *bfl; + void *caller_owner, *blocked_owner; + unsigned int caller_pid, blocked_pid; + + caller_owner = caller_fl->fl_owner; + caller_pid = caller_fl->fl_pid; + blocked_owner = block_fl->fl_owner; + blocked_pid = block_fl->fl_pid; next_task: - if (my_task == blocked_task) + if (caller_owner == blocked_owner && caller_pid == blocked_pid) return (1); for (fl = file_lock_table; fl != NULL; fl = fl->fl_nextlink) { if (fl->fl_owner == NULL || fl->fl_nextblock == NULL) continue; for (bfl = fl->fl_nextblock; bfl != fl; bfl = bfl->fl_nextblock) { - if (bfl->fl_owner == blocked_task) { - if (fl->fl_owner == my_task) { + if (bfl->fl_owner == blocked_owner && + bfl->fl_pid == blocked_pid) { + if (fl->fl_owner == caller_owner && + fl->fl_pid == caller_pid) { return (1); } - blocked_task = fl->fl_owner; + blocked_owner = fl->fl_owner; + blocked_pid = fl->fl_pid; goto next_task; } } @@ -701,8 +736,9 @@ next_task: return (0); } -/* Try to create a FLOCK lock on filp. We always insert new locks at - * the head of the list. +/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks at + * the head of the list, but that's secret knowledge known only to the next + * two functions. */ static int flock_lock_file(struct file *filp, struct file_lock *caller, unsigned int wait) @@ -713,11 +749,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *caller, int change = 0; before = &filp->f_inode->i_flock; - - if ((fl = *before) && (fl->fl_flags & FL_POSIX)) - return (-EBUSY); - - while ((fl = *before) != NULL) { + while (((fl = *before) != NULL) && (fl->fl_flags & FL_FLOCK)) { if (caller->fl_file == fl->fl_file) { if (caller->fl_type == fl->fl_type) return (0); @@ -736,39 +768,34 @@ static int flock_lock_file(struct file *filp, struct file_lock *caller, if ((new_fl = locks_alloc_lock(caller)) == NULL) return (-ENOLCK); repeat: - if ((fl = filp->f_inode->i_flock) && (fl->fl_flags & FL_POSIX)) { - locks_free_lock(new_fl); - return (-EBUSY); - } - - while (fl != NULL) { - if (flock_locks_conflict(new_fl, fl)) { - if (!wait) { - locks_free_lock(new_fl); - return (-EAGAIN); - } - if (current->signal & ~current->blocked) { - /* Note: new_fl is not in any queue at this - * point, so we must use locks_free_lock() - * instead of locks_delete_lock() - * Dmitry Gorodchanin 09/02/96. - */ - locks_free_lock(new_fl); - return (-ERESTARTSYS); - } - locks_insert_block(fl, new_fl); - interruptible_sleep_on(&new_fl->fl_wait); - locks_delete_block(fl, new_fl); - if (current->signal & ~current->blocked) { - /* Awakened by a signal. Free the new - * lock and return an error. - */ - locks_free_lock(new_fl); - return (-ERESTARTSYS); - } - goto repeat; + for (fl = filp->f_inode->i_flock; (fl != NULL) && (fl->fl_flags & FL_FLOCK); + fl = fl->fl_next) { + if (!flock_locks_conflict(new_fl, fl)) + continue; + if (!wait) { + locks_free_lock(new_fl); + return (-EAGAIN); + } + if (current->signal & ~current->blocked) { + /* Note: new_fl is not in any queue at this + * point, so we must use locks_free_lock() + * instead of locks_delete_lock() + * Dmitry Gorodchanin 09/02/96. + */ + locks_free_lock(new_fl); + return (-ERESTARTSYS); + } + locks_insert_block(fl, new_fl); + interruptible_sleep_on(&new_fl->fl_wait); + locks_delete_block(fl, new_fl); + if (current->signal & ~current->blocked) { + /* Awakened by a signal. Free the new + * lock and return an error. + */ + locks_free_lock(new_fl); + return (-ERESTARTSYS); } - fl = fl->fl_next; + goto repeat; } locks_insert_lock(&filp->f_inode->i_flock, new_fl); return (0); @@ -786,7 +813,7 @@ repeat: * To all purists: Yes, I use a few goto's. Just pass on to the next function. */ -static int posix_lock_file(struct file *filp, struct file_lock *caller, +int posix_lock_file(struct file *filp, struct file_lock *caller, unsigned int wait) { struct file_lock *fl; @@ -796,27 +823,25 @@ static int posix_lock_file(struct file *filp, struct file_lock *caller, struct file_lock **before; int added = 0; -repeat: - if ((fl = filp->f_inode->i_flock) && (fl->fl_flags & FL_FLOCK)) - return (-EBUSY); - if (caller->fl_type != F_UNLCK) { - while (fl != NULL) { - if (posix_locks_conflict(caller, fl)) { - if (!wait) - return (-EAGAIN); - if (current->signal & ~current->blocked) - return (-ERESTARTSYS); - if (posix_locks_deadlock(caller->fl_owner, fl->fl_owner)) - return (-EDEADLK); - locks_insert_block(fl, caller); - interruptible_sleep_on(&caller->fl_wait); - locks_delete_block(fl, caller); - if (current->signal & ~current->blocked) - return (-ERESTARTSYS); - goto repeat; - } - fl = fl->fl_next; + repeat: + for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (!posix_locks_conflict(caller, fl)) + continue; + if (!wait) + return (-EAGAIN); + if (current->signal & ~current->blocked) + return (-ERESTARTSYS); + if (posix_locks_deadlock(caller, fl)) + return (-EDEADLK); + locks_insert_block(fl, caller); + interruptible_sleep_on(&caller->fl_wait); + locks_delete_block(fl, caller); + if (current->signal & ~current->blocked) + return (-ERESTARTSYS); + goto repeat; } } @@ -827,13 +852,14 @@ repeat: /* First skip locks owned by other processes. */ - while ((fl = *before) && (caller->fl_owner != fl->fl_owner)) { + while ((fl = *before) && (!(fl->fl_flags & FL_POSIX) || + !locks_same_owner(caller, fl))) { before = &fl->fl_next; } /* Process locks with this owner. */ - while ((fl = *before) && (caller->fl_owner == fl->fl_owner)) { + while ((fl = *before) && locks_same_owner(caller, fl)) { /* Detect adjacent or overlapping regions (if same lock type) */ if (caller->fl_type == fl->fl_type) { @@ -901,6 +927,7 @@ repeat: fl->fl_start = caller->fl_start; fl->fl_end = caller->fl_end; fl->fl_type = caller->fl_type; + fl->fl_u = caller->fl_u; caller = fl; added = 1; } @@ -954,18 +981,17 @@ static struct file_lock *locks_alloc_lock(struct file_lock *fl) GFP_ATOMIC)) == NULL) return (tmp); - tmp->fl_nextlink = NULL; - tmp->fl_prevlink = NULL; - tmp->fl_next = NULL; - tmp->fl_nextblock = NULL; - tmp->fl_prevblock = NULL; + memset(tmp, 0, sizeof(*tmp)); + tmp->fl_flags = fl->fl_flags; tmp->fl_owner = fl->fl_owner; + tmp->fl_pid = fl->fl_pid; tmp->fl_file = fl->fl_file; - tmp->fl_wait = NULL; tmp->fl_type = fl->fl_type; tmp->fl_start = fl->fl_start; tmp->fl_end = fl->fl_end; + tmp->fl_notify = fl->fl_notify; + tmp->fl_u = fl->fl_u; return (tmp); } @@ -1027,8 +1053,7 @@ static char *lock_get_status(struct file_lock *fl, char *p, int id, char *pfx) p += sprintf(p, "%d:%s ", id, pfx); if (fl->fl_flags & FL_POSIX) { p += sprintf(p, "%s %s ", - (fl->fl_flags & FL_ACCESS) ? "ACCESS" : - ((fl->fl_flags & FL_BROKEN) ? "BROKEN" : "POSIX "), + (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX", (IS_MANDLOCK(inode) && (inode->i_mode & (S_IXGRP | S_ISGID)) == S_ISGID) ? "MANDATORY" : "ADVISORY "); @@ -1038,7 +1063,7 @@ static char *lock_get_status(struct file_lock *fl, char *p, int id, char *pfx) } p += sprintf(p, "%s ", (fl->fl_type == F_RDLCK) ? "READ " : "WRITE"); p += sprintf(p, "%d %s:%ld %ld %ld ", - fl->fl_owner ? fl->fl_owner->pid : 0, + fl->fl_pid, kdevname(inode->i_dev), inode->i_ino, fl->fl_start, fl->fl_end); p += sprintf(p, "%08lx %08lx %08lx %08lx %08lx\n", @@ -1063,6 +1088,6 @@ int get_locks_status(char *buf) p = lock_get_status(bfl, p, i, " ->"); } while ((bfl = bfl->fl_nextblock) != fl); } - return (p - buf); + return (p - buf); } diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 06d8fa907..2acbe5c17 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -4,6 +4,11 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ +/* + * Modified for 680x0 by Hamish Macdonald + * Fixed for 680x0 by Andreas Schwab + */ + /* bitmap.c contains the code that handles the inode and block bitmaps */ #include <linux/sched.h> @@ -57,7 +62,7 @@ void minix_free_block(struct super_block * sb, int block) printk("minix_free_block: nonexistent bitmap buffer\n"); return; } - if (!clear_bit(bit,bh->b_data)) + if (!minix_clear_bit(bit,bh->b_data)) printk("free_block (%s:%d): bit already cleared\n", kdevname(sb->s_dev), block); mark_buffer_dirty(bh, 1); @@ -77,11 +82,11 @@ repeat: j = 8192; for (i=0 ; i<64 ; i++) if ((bh=sb->u.minix_sb.s_zmap[i]) != NULL) - if ((j=find_first_zero_bit(bh->b_data, 8192)) < 8192) + if ((j=minix_find_first_zero_bit(bh->b_data, 8192)) < 8192) break; if (i>=64 || !bh || j>=8192) return 0; - if (set_bit(j,bh->b_data)) { + if (minix_set_bit(j,bh->b_data)) { printk("new_block: bit already set"); goto repeat; } @@ -209,7 +214,7 @@ void minix_free_inode(struct inode * inode) } minix_clear_inode(inode); clear_inode(inode); - if (!clear_bit(ino & 8191, bh->b_data)) + if (!minix_clear_bit(ino & 8191, bh->b_data)) printk("free_inode: bit %lu already cleared.\n",ino); mark_buffer_dirty(bh, 1); } @@ -229,13 +234,13 @@ struct inode * minix_new_inode(const struct inode * dir) j = 8192; for (i=0 ; i<8 ; i++) if ((bh = inode->i_sb->u.minix_sb.s_imap[i]) != NULL) - if ((j=find_first_zero_bit(bh->b_data, 8192)) < 8192) + if ((j=minix_find_first_zero_bit(bh->b_data, 8192)) < 8192) break; if (!bh || j >= 8192) { iput(inode); return NULL; } - if (set_bit(j,bh->b_data)) { /* shouldn't happen */ + if (minix_set_bit(j,bh->b_data)) { /* shouldn't happen */ printk("new_inode: bit already set"); iput(inode); return NULL; diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 851d1f7da..ec5113c4a 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -27,7 +27,7 @@ static struct file_operations minix_dir_operations = { minix_dir_read, /* read */ NULL, /* write - bad */ minix_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/minix/file.c b/fs/minix/file.c index 009bd09ed..23aa70268 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -38,7 +38,7 @@ static struct file_operations minix_file_operations = { generic_file_read, /* read */ minix_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ generic_file_mmap, /* mmap */ NULL, /* no special open is needed */ diff --git a/fs/minix/inode.c b/fs/minix/inode.c index e6fe65f48..faf5ce8a4 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -5,6 +5,8 @@ * * Copyright (C) 1996 Gertjan van Wingerde (gertjan@cs.vu.nl) * Minix V2 fs support. + * + * Modified for 680x0 by Andreas Schwab */ #include <linux/module.h> @@ -16,6 +18,7 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/locks.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -72,7 +75,7 @@ void minix_put_super(struct super_block *sb) return; } -static struct super_operations minix_sops = { +static struct super_operations minix_sops = { minix_read_inode, NULL, minix_write_inode, @@ -161,7 +164,7 @@ static const char * minix_checkroot(struct super_block *s) return errmsg; } -struct super_block *minix_read_super(struct super_block *s,void *data, +struct super_block *minix_read_super(struct super_block *s,void *data, int silent) { struct buffer_head *bh; @@ -263,8 +266,8 @@ struct super_block *minix_read_super(struct super_block *s,void *data, MOD_DEC_USE_COUNT; return NULL; } - set_bit(0,s->u.minix_sb.s_imap[0]->b_data); - set_bit(0,s->u.minix_sb.s_zmap[0]->b_data); + minix_set_bit(0,s->u.minix_sb.s_imap[0]->b_data); + minix_set_bit(0,s->u.minix_sb.s_zmap[0]->b_data); unlock_super(s); /* set up enough so that it can read an inode */ s->s_dev = dev; @@ -439,7 +442,7 @@ int minix_bmap(struct inode * inode,int block) /* * The minix V1 fs getblk functions. */ -static struct buffer_head * V1_inode_getblk(struct inode * inode, int nr, +static struct buffer_head * V1_inode_getblk(struct inode * inode, int nr, int create) { int tmp; @@ -473,7 +476,7 @@ repeat: return result; } -static struct buffer_head * V1_block_getblk(struct inode * inode, +static struct buffer_head * V1_block_getblk(struct inode * inode, struct buffer_head * bh, int nr, int create) { int tmp; @@ -523,7 +526,7 @@ repeat: return result; } -static struct buffer_head * V1_minix_getblk(struct inode * inode, int block, +static struct buffer_head * V1_minix_getblk(struct inode * inode, int block, int create) { struct buffer_head * bh; @@ -552,7 +555,7 @@ static struct buffer_head * V1_minix_getblk(struct inode * inode, int block, /* * The minix V2 fs getblk functions. */ -static struct buffer_head * V2_inode_getblk(struct inode * inode, int nr, +static struct buffer_head * V2_inode_getblk(struct inode * inode, int nr, int create) { int tmp; @@ -586,7 +589,7 @@ repeat: return result; } -static struct buffer_head * V2_block_getblk(struct inode * inode, +static struct buffer_head * V2_block_getblk(struct inode * inode, struct buffer_head * bh, int nr, int create) { int tmp; @@ -636,7 +639,7 @@ repeat: return result; } -static struct buffer_head * V2_minix_getblk(struct inode * inode, int block, +static struct buffer_head * V2_minix_getblk(struct inode * inode, int block, int create) { struct buffer_head * bh; @@ -944,19 +947,17 @@ static struct file_system_type minix_fs_type = { minix_read_super, "minix", 1, NULL }; -int init_minix_fs(void) +__initfunc(int init_minix_fs(void)) { return register_filesystem(&minix_fs_type); } #ifdef MODULE +EXPORT_NO_SYMBOLS; + int init_module(void) { - int status; - - if ((status = init_minix_fs()) == 0) - register_symtab(0); - return status; + return init_minix_fs(); } void cleanup_module(void) diff --git a/fs/msdos/msdosfs_syms.c b/fs/msdos/msdosfs_syms.c index 2621fbfcc..914a178a9 100644 --- a/fs/msdos/msdosfs_syms.c +++ b/fs/msdos/msdosfs_syms.c @@ -5,43 +5,36 @@ * These symbols are used by umsdos. */ +#include <linux/config.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/msdos_fs.h> +#include <linux/init.h> -static struct symbol_table msdos_syms = { -#include <linux/symtab_begin.h> - /* - * Support for umsdos fs - * - * These symbols are _always_ exported, in case someone - * wants to install the umsdos module later. - */ - X(msdos_create), - X(msdos_lookup), - X(msdos_mkdir), - X(msdos_read_inode), - X(msdos_rename), - X(msdos_rmdir), - X(msdos_unlink), - X(msdos_unlink_umsdos), - X(msdos_read_super), - X(msdos_put_super), -#include <linux/symtab_end.h> -}; - -struct file_system_type msdos_fs_type = { +/* + * Support for umsdos fs + * + * These symbols are _always_ exported, in case someone + * wants to install the umsdos module later. + */ +EXPORT_SYMBOL(msdos_create); +EXPORT_SYMBOL(msdos_lookup); +EXPORT_SYMBOL(msdos_mkdir); +EXPORT_SYMBOL(msdos_read_inode); +EXPORT_SYMBOL(msdos_rename); +EXPORT_SYMBOL(msdos_rmdir); +EXPORT_SYMBOL(msdos_unlink); +EXPORT_SYMBOL(msdos_unlink_umsdos); +EXPORT_SYMBOL(msdos_read_super); +EXPORT_SYMBOL(msdos_put_super); + + +static struct file_system_type msdos_fs_type = { msdos_read_super, "msdos", 1, NULL }; - -int init_msdos_fs(void) +__initfunc(int init_msdos_fs(void)) { - int status; - - if ((status = register_filesystem(&msdos_fs_type)) == 0) - status = register_symtab(&msdos_syms); - return status; + return register_filesystem(&msdos_fs_type); } - diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index e34e5de5b..1c76bdc41 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -5,6 +5,8 @@ * Hidden files 1995 by Albert Cahalan <albert@ccs.neu.edu> <adc@coe.neu.edu> */ +#include <linux/config.h> + #define __NO_VERSION__ #include <linux/module.h> @@ -25,17 +27,23 @@ /* MS-DOS "device special files" */ static const char *reserved_names[] = { +#ifndef CONFIG_ATARI /* GEMDOS is less stupid */ "CON ","PRN ","NUL ","AUX ", "LPT1 ","LPT2 ","LPT3 ","LPT4 ", "COM1 ","COM2 ","COM3 ","COM4 ", +#endif NULL }; /* Characters that are undesirable in an MS-DOS file name */ static char bad_chars[] = "*?<>|\""; +#ifdef CONFIG_ATARI +/* GEMDOS is less restrictive */ +static char bad_if_strict[] = " "; +#else static char bad_if_strict[] = "+=,; "; - +#endif void msdos_put_super(struct super_block *sb) { @@ -96,7 +104,11 @@ static int msdos_format_name(char conv,const char *name,int len, /* Get rid of dot - test for it elsewhere */ name++; len--; } +#ifndef CONFIG_ATARI space = 1; /* disallow names that _really_ start with a dot */ +#else + space = 0; /* GEMDOS does not care */ +#endif c = 0; for (walk = res; len && walk-res < 8; walk++) { c = *name++; @@ -129,8 +141,13 @@ static int msdos_format_name(char conv,const char *name,int len, if (conv != 'r' && strchr(bad_chars,c)) return -EINVAL; if (conv == 's' && strchr(bad_if_strict,c)) return -EINVAL; - if (c < ' ' || c == ':' || c == '\\' || c == '.') + if (c < ' ' || c == ':' || c == '\\') return -EINVAL; + if (c == '.') { + if (conv == 's') + return -EINVAL; + break; + } if (c >= 'A' && c <= 'Z' && conv == 's') return -EINVAL; space = c == ' '; *walk++ = c >= 'a' && c <= 'z' ? c-32 : c; diff --git a/fs/namei.c b/fs/namei.c index 2b456c72f..aeaca8f45 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -15,11 +15,16 @@ #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> +#include <asm/unaligned.h> +#include <asm/namei.h> #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) + /* * In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the @@ -48,25 +53,42 @@ static inline int do_getname(const char *filename, char *page) return retval; } +/* + * This is a single page for faster getname. + * If the page is available when entering getname, use it. + * If the page is not available, call __get_free_page instead. + * This works even though do_getname can block (think about it). + * -- Michael Chastain, based on idea of Linus Torvalds, 1 Dec 1996. + */ +static unsigned long name_page_cache = 0; + int getname(const char * filename, char **result) { unsigned long page; int retval; - page = __get_free_page(GFP_KERNEL); - retval = -ENOMEM; - if (page) { - *result = (char *)page; - retval = do_getname(filename, (char *) page); - if (retval < 0) - free_page(page); + page = name_page_cache; + name_page_cache = 0; + if (!page) { + page = __get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; } + + retval = do_getname(filename, (char *) page); + if (retval < 0) + putname( (char *) page ); + else + *result = (char *) page; return retval; } void putname(char * name) { - free_page((unsigned long) name); + if (name_page_cache == 0) + name_page_cache = (unsigned long) name; + else + free_page((unsigned long) name); } /* @@ -145,7 +167,7 @@ int lookup(struct inode * dir,const char * name, int len, return -ENOENT; /* check permissions before traversing mount-points */ perm = permission(dir,MAY_EXEC); - if (len==2 && name[0] == '.' && name[1] == '.') { + if (len==2 && get_unaligned((u16 *) name) == 0x2e2e) { if (dir == current->fs->root) { *result = dir; return 0; @@ -198,7 +220,7 @@ int follow_link(struct inode * dir, struct inode * inode, static int dir_namei(const char *pathname, int *namelen, const char **name, struct inode * base, struct inode **res_inode) { - char c; + unsigned char c; const char * thisname; int len,error; struct inode * inode; @@ -240,13 +262,14 @@ static int dir_namei(const char *pathname, int *namelen, const char **name, return 0; } -static int _namei(const char * pathname, struct inode * base, +int _namei(const char * pathname, struct inode * base, int follow_links, struct inode ** res_inode) { const char *basename; int namelen,error; struct inode * inode; + translate_namei(pathname, base, follow_links, res_inode); *res_inode = NULL; error = dir_namei(pathname, &namelen, &basename, base, &base); if (error) @@ -313,18 +336,15 @@ int namei(const char *pathname, struct inode **res_inode) * which is a lot more logical, and also allows the "no perm" needed * for symlinks (where the permissions are checked later). */ -#ifdef __mips__ -int do_open_namei(const char * pathname, int flag, int mode, - struct inode ** res_inode, struct inode * base) -#else -int open_namei(const char * pathname, int flag, int mode, - struct inode ** res_inode, struct inode * base) -#endif +int +open_namei(const char * pathname, int flag, int mode, + struct inode ** res_inode, struct inode * base) { const char * basename; int namelen,error; struct inode * dir, *inode; + translate_open_namei(pathname, flag, mode, res_inode, base); mode &= S_IALLUGO & ~current->fs->umask; mode |= S_IFREG; error = dir_namei(pathname, &namelen, &basename, base, &dir); @@ -486,8 +506,11 @@ asmlinkage int sys_mknod(const char * filename, int mode, dev_t dev) int error; char * tmp; + lock_kernel(); + error = -EPERM; if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !fsuser())) - return -EPERM; + goto out; + error = -EINVAL; switch (mode & S_IFMT) { case 0: mode |= S_IFREG; @@ -495,13 +518,15 @@ asmlinkage int sys_mknod(const char * filename, int mode, dev_t dev) case S_IFREG: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: break; default: - return -EINVAL; + goto out; } error = getname(filename,&tmp); if (!error) { error = do_mknod(tmp,mode,dev); putname(tmp); } +out: + unlock_kernel(); return error; } @@ -582,12 +607,14 @@ asmlinkage int sys_mkdir(const char * pathname, int mode) int error; char * tmp; + lock_kernel(); error = getname(pathname,&tmp); if (!error) { remove_trailing_slashes(tmp); error = do_mkdir(tmp,mode); putname(tmp); } + unlock_kernel(); return error; } @@ -625,7 +652,10 @@ static int do_rmdir(const char * name) } if (dir->i_sb && dir->i_sb->dq_op) dir->i_sb->dq_op->initialize(dir, -1); - return dir->i_op->rmdir(dir,basename,namelen); + down(&dir->i_sem); + error = dir->i_op->rmdir(dir,basename,namelen); + up(&dir->i_sem); + return error; } asmlinkage int sys_rmdir(const char * pathname) @@ -633,12 +663,14 @@ asmlinkage int sys_rmdir(const char * pathname) int error; char * tmp; + lock_kernel(); error = getname(pathname,&tmp); if (!error) { remove_trailing_slashes(tmp); error = do_rmdir(tmp); putname(tmp); } + unlock_kernel(); return error; } @@ -676,7 +708,10 @@ static int do_unlink(const char * name) } if (dir->i_sb && dir->i_sb->dq_op) dir->i_sb->dq_op->initialize(dir, -1); - return dir->i_op->unlink(dir,basename,namelen); + down(&dir->i_sem); + error = dir->i_op->unlink(dir,basename,namelen); + up(&dir->i_sem); + return error; } asmlinkage int sys_unlink(const char * pathname) @@ -684,11 +719,13 @@ asmlinkage int sys_unlink(const char * pathname) int error; char * tmp; + lock_kernel(); error = getname(pathname,&tmp); if (!error) { error = do_unlink(tmp); putname(tmp); } + unlock_kernel(); return error; } @@ -732,6 +769,7 @@ asmlinkage int sys_symlink(const char * oldname, const char * newname) int error; char * from, * to; + lock_kernel(); error = getname(oldname,&from); if (!error) { error = getname(newname,&to); @@ -741,6 +779,7 @@ asmlinkage int sys_symlink(const char * oldname, const char * newname) } putname(from); } + unlock_kernel(); return error; } @@ -804,16 +843,19 @@ asmlinkage int sys_link(const char * oldname, const char * newname) char * to; struct inode * oldinode; + lock_kernel(); error = lnamei(oldname, &oldinode); if (error) - return error; + goto out; error = getname(newname,&to); if (error) { iput(oldinode); - return error; + goto out; } error = do_link(oldinode,to); putname(to); +out: + unlock_kernel(); return error; } @@ -892,6 +934,7 @@ asmlinkage int sys_rename(const char * oldname, const char * newname) int error; char * from, * to; + lock_kernel(); error = getname(oldname,&from); if (!error) { error = getname(newname,&to); @@ -903,5 +946,6 @@ asmlinkage int sys_rename(const char * oldname, const char * newname) } putname(from); } + unlock_kernel(); return error; } diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index ede71765f..e62e26e47 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -2,6 +2,7 @@ * dir.c * * Copyright (C) 1995, 1996 by Volker Lendecke + * Modified for big endian by J.F. Chadima and David S. Miller * */ @@ -14,105 +15,100 @@ #include <linux/mm.h> #include <linux/ncp_fs.h> #include <asm/uaccess.h> +#include <asm/byteorder.h> #include <linux/errno.h> #include <linux/locks.h> #include "ncplib_kernel.h" + struct ncp_dirent { struct nw_info_struct i; - struct nw_search_sequence s; /* given back for i */ + struct nw_search_sequence s; /* given back for i */ unsigned long f_pos; }; static long -ncp_dir_read(struct inode *inode, struct file *filp, char *buf, unsigned long count); + ncp_dir_read(struct inode *inode, struct file *filp, char *buf, unsigned long count); -static int -ncp_readdir(struct inode *inode, struct file *filp, - void *dirent, filldir_t filldir); +static int + ncp_readdir(struct inode *inode, struct file *filp, + void *dirent, filldir_t filldir); static int -ncp_read_volume_list(struct ncp_server *server, int start_with, - int cache_size); + ncp_read_volume_list(struct ncp_server *server, int start_with, + int cache_size); static int -ncp_do_readdir(struct ncp_server *server, struct inode *dir, int fpos, - int cache_size, struct ncp_dirent *entry); + ncp_do_readdir(struct ncp_server *server, struct inode *dir, int fpos, + int cache_size, struct ncp_dirent *entry); static struct inode * -ncp_iget(struct inode *dir, struct nw_file_info *finfo); + ncp_iget(struct inode *dir, struct nw_file_info *finfo); static struct ncp_inode_info * -ncp_find_dir_inode(struct inode *dir, const char *name); + ncp_find_dir_inode(struct inode *dir, const char *name); static int -ncp_lookup(struct inode *dir, const char *__name, - int len, struct inode **result); + ncp_lookup(struct inode *dir, const char *__name, + int len, struct inode **result); -static int -ncp_create(struct inode *dir, const char *name, int len, int mode, - struct inode **result); +static int + ncp_create(struct inode *dir, const char *name, int len, int mode, + struct inode **result); -static int -ncp_mkdir(struct inode *dir, const char *name, int len, int mode); +static int + ncp_mkdir(struct inode *dir, const char *name, int len, int mode); -static int -ncp_rmdir(struct inode *dir, const char *name, int len); +static int + ncp_rmdir(struct inode *dir, const char *name, int len); static int -ncp_unlink(struct inode *dir, const char *name, int len); + ncp_unlink(struct inode *dir, const char *name, int len); static int -ncp_rename(struct inode *old_dir, const char *old_name, int old_len, - struct inode *new_dir, const char *new_name, int new_len, - int must_be_dir); + ncp_rename(struct inode *old_dir, const char *old_name, int old_len, + struct inode *new_dir, const char *new_name, int new_len, + int must_be_dir); -static inline void -str_upper(char *name) +static inline void str_upper(char *name) { - while (*name) - { - if (*name >= 'a' && *name <= 'z') - { + while (*name) { + if (*name >= 'a' && *name <= 'z') { *name -= ('a' - 'A'); } name++; } } -static inline void -str_lower(char *name) +static inline void str_lower(char *name) { - while (*name) - { - if (*name >= 'A' && *name <= 'Z') - { + while (*name) { + if (*name >= 'A' && *name <= 'Z') { *name += ('a' - 'A'); } - name ++; + name++; } } -static inline int -ncp_namespace(struct inode *i) +static inline int ncp_namespace(struct inode *i) { - struct ncp_server *server = NCP_SERVER(i); + struct ncp_server *server = NCP_SERVER(i); struct nw_info_struct *info = NCP_ISTRUCT(i); return server->name_space[info->volNumber]; } -static inline int -ncp_preserve_case(struct inode *i) +static inline int ncp_preserve_case(struct inode *i) { return (ncp_namespace(i) == NW_NS_OS2); } -static struct file_operations ncp_dir_operations = { - NULL, /* lseek - default */ +static struct file_operations ncp_dir_operations = +{ + NULL, /* lseek - default */ ncp_dir_read, /* read - bad */ NULL, /* write - bad */ ncp_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ ncp_ioctl, /* ioctl */ NULL, /* mmap */ NULL, /* no special open code */ @@ -120,23 +116,24 @@ static struct file_operations ncp_dir_operations = { NULL /* fsync */ }; -struct inode_operations ncp_dir_inode_operations = { +struct inode_operations ncp_dir_inode_operations = +{ &ncp_dir_operations, /* default directory file ops */ ncp_create, /* create */ - ncp_lookup, /* lookup */ + ncp_lookup, /* lookup */ NULL, /* link */ - ncp_unlink, /* unlink */ + ncp_unlink, /* unlink */ NULL, /* symlink */ - ncp_mkdir, /* mkdir */ - ncp_rmdir, /* rmdir */ + ncp_mkdir, /* mkdir */ + ncp_rmdir, /* rmdir */ NULL, /* mknod */ - ncp_rename, /* rename */ + ncp_rename, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ NULL, /* bmap */ NULL, /* truncate */ NULL, /* permission */ - NULL /* smap */ + NULL /* smap */ }; @@ -148,223 +145,181 @@ struct inode_operations ncp_dir_inode_operations = { * enable the NFS exportability of a ncpfs-mounted volume. */ -static inline int -ncp_single_volume(struct ncp_server *server) +static inline int ncp_single_volume(struct ncp_server *server) { return (server->m.mounted_vol[0] != '\0'); } inline ino_t -ncp_info_ino(struct ncp_server *server, struct ncp_inode_info *info) + ncp_info_ino(struct ncp_server * server, struct ncp_inode_info * info) { return ncp_single_volume(server) - ? info->finfo.i.dirEntNum : (ino_t)info; + ? info->finfo.i.dirEntNum : (ino_t) info; } -static inline int -ncp_is_server_root(struct inode *inode) +static inline int ncp_is_server_root(struct inode *inode) { struct ncp_server *s = NCP_SERVER(inode); - return ( (!ncp_single_volume(s)) + return ((!ncp_single_volume(s)) && (inode->i_ino == ncp_info_ino(s, &(s->root)))); } struct ncp_inode_info * -ncp_find_inode(struct inode *inode) + ncp_find_inode(struct inode *inode) { struct ncp_server *server = NCP_SERVER(inode); - struct ncp_inode_info *root = &(server->root); - struct ncp_inode_info *this = root; + struct ncp_inode_info *root = &(server->root); + struct ncp_inode_info *this = root; ino_t ino = inode->i_ino; - do - { - if (ino == ncp_info_ino(server, this)) - { + do { + if (ino == ncp_info_ino(server, this)) { return this; } this = this->next; - } + } while (this != root); return NULL; } - -static long -ncp_dir_read(struct inode *inode, struct file *filp, char *buf, unsigned long count) + +static long ncp_dir_read(struct inode *inode, struct file *filp, char *buf, unsigned long count) { return -EISDIR; } -static kdev_t c_dev = 0; -static unsigned long c_ino = 0; -static int c_size; -static int c_seen_eof; -static int c_last_returned_index; -static struct ncp_dirent* c_entry = NULL; -static int c_lock = 0; +static kdev_t c_dev = 0; +static unsigned long c_ino = 0; +static int c_size; +static int c_seen_eof; +static int c_last_returned_index; +static struct ncp_dirent *c_entry = NULL; +static int c_lock = 0; static struct wait_queue *c_wait = NULL; -static inline void -ncp_lock_dircache(void) +static inline void ncp_lock_dircache(void) { while (c_lock) sleep_on(&c_wait); c_lock = 1; } -static inline void -ncp_unlock_dircache(void) +static inline void ncp_unlock_dircache(void) { c_lock = 0; wake_up(&c_wait); } -static int -ncp_readdir(struct inode *inode, struct file *filp, - void *dirent, filldir_t filldir) +static int ncp_readdir(struct inode *inode, struct file *filp, + void *dirent, filldir_t filldir) { int result = 0; int i = 0; - int index = 0; + int index = 0; struct ncp_dirent *entry = NULL; - struct ncp_server *server = NCP_SERVER(inode); + struct ncp_server *server = NCP_SERVER(inode); struct ncp_inode_info *dir = NCP_INOP(inode); - DDPRINTK("ncp_readdir: filp->f_pos = %d\n", (int)filp->f_pos); + DDPRINTK("ncp_readdir: filp->f_pos = %d\n", (int) filp->f_pos); DDPRINTK("ncp_readdir: inode->i_ino = %ld, c_ino = %ld\n", inode->i_ino, c_ino); - if (!inode || !S_ISDIR(inode->i_mode)) - { + if (!inode || !S_ISDIR(inode->i_mode)) { printk("ncp_readdir: inode is NULL or not a directory\n"); return -EBADF; } - - if (!ncp_conn_valid(server)) - { + if (!ncp_conn_valid(server)) { return -EIO; } - ncp_lock_dircache(); - if (c_entry == NULL) - { - i = sizeof (struct ncp_dirent) * NCP_READDIR_CACHE_SIZE; + if (c_entry == NULL) { + i = sizeof(struct ncp_dirent) * NCP_READDIR_CACHE_SIZE; c_entry = (struct ncp_dirent *) vmalloc(i); - if (c_entry == NULL) - { + if (c_entry == NULL) { printk("ncp_readdir: no MEMORY for cache\n"); result = -ENOMEM; goto finished; } } - - if (filp->f_pos == 0) - { - ncp_invalid_dir_cache(inode); - if (filldir(dirent,".",1, filp->f_pos, - ncp_info_ino(server, dir)) < 0) - { + if (filp->f_pos == 0) { + ncp_invalid_dir_cache(inode); + if (filldir(dirent, ".", 1, filp->f_pos, + ncp_info_ino(server, dir)) < 0) { goto finished; } filp->f_pos += 1; - } - - if (filp->f_pos == 1) - { - if (filldir(dirent,"..",2, filp->f_pos, - ncp_info_ino(server, dir->dir)) < 0) - { + } + if (filp->f_pos == 1) { + if (filldir(dirent, "..", 2, filp->f_pos, + ncp_info_ino(server, dir->dir)) < 0) { goto finished; } filp->f_pos += 1; } - - if ((inode->i_dev == c_dev) && (inode->i_ino == c_ino)) - { - for (i = 0; i < c_size; i++) - { - if (filp->f_pos == c_entry[i].f_pos) - { - entry = &c_entry[i]; - c_last_returned_index = i; - index = i; - break; + if ((inode->i_dev == c_dev) && (inode->i_ino == c_ino)) { + for (i = 0; i < c_size; i++) { + if (filp->f_pos == c_entry[i].f_pos) { + entry = &c_entry[i]; + c_last_returned_index = i; + index = i; + break; } } - if ((entry == NULL) && c_seen_eof) - { + if ((entry == NULL) && c_seen_eof) { goto finished; } } - - if (entry == NULL) - { + if (entry == NULL) { int entries; DDPRINTK("ncp_readdir: Not found in cache.\n"); - if (ncp_is_server_root(inode)) - { + if (ncp_is_server_root(inode)) { entries = ncp_read_volume_list(server, filp->f_pos, - NCP_READDIR_CACHE_SIZE); + NCP_READDIR_CACHE_SIZE); DPRINTK("ncp_read_volume_list returned %d\n", entries); - } - else - { + } else { entries = ncp_do_readdir(server, inode, filp->f_pos, NCP_READDIR_CACHE_SIZE, c_entry); DPRINTK("ncp_readdir returned %d\n", entries); } - if (entries < 0) - { + if (entries < 0) { c_dev = 0; c_ino = 0; result = entries; goto finished; } - - if (entries > 0) - { - c_seen_eof = (entries < NCP_READDIR_CACHE_SIZE); - c_dev = inode->i_dev; - c_ino = inode->i_ino; + if (entries > 0) { + c_seen_eof = (entries < NCP_READDIR_CACHE_SIZE); + c_dev = inode->i_dev; + c_ino = inode->i_ino; c_size = entries; entry = c_entry; - c_last_returned_index = 0; - index = 0; + c_last_returned_index = 0; + index = 0; - if (!ncp_preserve_case(inode)) - { - for (i = 0; i < c_size; i++) - { + if (!ncp_preserve_case(inode)) { + for (i = 0; i < c_size; i++) { str_lower(c_entry[i].i.entryName); } } } } - - if (entry == NULL) - { - /* Nothing found, even from a ncp call */ + if (entry == NULL) { + /* Nothing found, even from a ncp call */ goto finished; - } - - while (index < c_size) - { + } + while (index < c_size) { ino_t ino; - if (ncp_single_volume(server)) - { - ino = (ino_t)(entry->i.dirEntNum); - } - else - { + if (ncp_single_volume(server)) { + ino = (ino_t) (entry->i.dirEntNum); + } else { /* For getwd() we have to return the correct * inode in d_ino if the inode is currently in * use. Otherwise the inode number does not @@ -376,41 +331,36 @@ ncp_readdir(struct inode *inode, struct file *filp, /* Some programs seem to be confused about a * zero inode number, so we set it to one. * Thanks to Gordon Chaffee for this one. */ - if (ino_info == NULL) - { + if (ino_info == NULL) { ino_info = (struct ncp_inode_info *) 1; } - ino = (ino_t)(ino_info); + ino = (ino_t) (ino_info); } DDPRINTK("ncp_readdir: entry->path= %s\n", entry->i.entryName); DDPRINTK("ncp_readdir: entry->f_pos = %ld\n", entry->f_pos); - if (filldir(dirent, entry->i.entryName, entry->i.nameLen, - entry->f_pos, ino) < 0) - { + if (filldir(dirent, entry->i.entryName, entry->i.nameLen, + entry->f_pos, ino) < 0) { break; - } - - if ( (inode->i_dev != c_dev) + } + if ((inode->i_dev != c_dev) || (inode->i_ino != c_ino) - || (entry->f_pos != filp->f_pos)) - { + || (entry->f_pos != filp->f_pos)) { /* Someone has destroyed the cache while we slept in filldir */ break; } - filp->f_pos += 1; - index += 1; - entry += 1; + filp->f_pos += 1; + index += 1; + entry += 1; } - finished: + finished: ncp_unlock_dircache(); return result; } -static int -ncp_read_volume_list(struct ncp_server *server, int fpos, int cache_size) +static int ncp_read_volume_list(struct ncp_server *server, int fpos, int cache_size) { struct ncp_dirent *entry = c_entry; @@ -418,47 +368,35 @@ ncp_read_volume_list(struct ncp_server *server, int fpos, int cache_size) int i; #if 1 - if (fpos < 2) - { + if (fpos < 2) { printk("OOPS, we expect fpos >= 2"); fpos = 2; } #endif - for (i=0; i<NCP_NUMBER_OF_VOLUMES; i++) - { + for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) { struct ncp_volume_info info; - if (ncp_get_volume_info_with_number(server, i, &info) != 0) - { + if (ncp_get_volume_info_with_number(server, i, &info) != 0) { return (total_count - fpos); } - - if (strlen(info.volume_name) > 0) - { - if (total_count < fpos) - { + if (strlen(info.volume_name) > 0) { + if (total_count < fpos) { DPRINTK("ncp_read_volumes: skipped vol: %s\n", info.volume_name); - } - else if (total_count >= fpos + cache_size) - { + } else if (total_count >= fpos + cache_size) { return (total_count - fpos); - } - else - { + } else { DPRINTK("ncp_read_volumes: found vol: %s\n", info.volume_name); if (ncp_lookup_volume(server, info.volume_name, - &(entry->i)) != 0) - { + &(entry->i)) != 0) { DPRINTK("ncpfs: could not lookup vol " "%s\n", info.volume_name); continue; } - entry->f_pos = total_count; entry += 1; } @@ -468,59 +406,46 @@ ncp_read_volume_list(struct ncp_server *server, int fpos, int cache_size) return (total_count - fpos); } -static int -ncp_do_readdir(struct ncp_server *server, struct inode *dir, int fpos, - int cache_size, struct ncp_dirent *entry) +static int ncp_do_readdir(struct ncp_server *server, struct inode *dir, int fpos, + int cache_size, struct ncp_dirent *entry) { static struct nw_search_sequence seq; static struct inode *last_dir; static int total_count; #if 1 - if (fpos < 2) - { + if (fpos < 2) { printk("OOPS, we expect fpos >= 2"); fpos = 2; } #endif DPRINTK("ncp_do_readdir: fpos = %d\n", fpos); - if (fpos == 2) - { + if (fpos == 2) { last_dir = NULL; total_count = 2; } - - if ((fpos != total_count) || (dir != last_dir)) - { + if ((fpos != total_count) || (dir != last_dir)) { total_count = 2; last_dir = dir; DPRINTK("ncp_do_readdir: re-used seq for %s\n", NCP_ISTRUCT(dir)->entryName); - if (ncp_initialize_search(server, NCP_ISTRUCT(dir), &seq)!=0) - { + if (ncp_initialize_search(server, NCP_ISTRUCT(dir), &seq) != 0) { DPRINTK("ncp_init_search failed\n"); return total_count - fpos; } } - - while (total_count < fpos + cache_size) - { + while (total_count < fpos + cache_size) { if (ncp_search_for_file_or_subdir(server, &seq, - &(entry->i)) != 0) - { + &(entry->i)) != 0) { return total_count - fpos; } - - if (total_count < fpos) - { + if (total_count < fpos) { DPRINTK("ncp_do_readdir: skipped file: %s\n", entry->i.entryName); - } - else - { + } else { DDPRINTK("ncp_do_r: file: %s, f_pos=%d,total_count=%d", entry->i.entryName, fpos, total_count); entry->s = seq; @@ -532,168 +457,150 @@ ncp_do_readdir(struct ncp_server *server, struct inode *dir, int fpos, return (total_count - fpos); } -void -ncp_init_dir_cache(void) +void ncp_init_dir_cache(void) { - c_dev = 0; - c_ino = 0; - c_entry = NULL; + c_dev = 0; + c_ino = 0; + c_entry = NULL; } -void -ncp_invalid_dir_cache(struct inode *ino) +void ncp_invalid_dir_cache(struct inode *ino) { - if ((ino->i_dev == c_dev) && (ino->i_ino == c_ino)) - { + if ((ino->i_dev == c_dev) && (ino->i_ino == c_ino)) { c_dev = 0; - c_ino = 0; - c_seen_eof = 0; - } + c_ino = 0; + c_seen_eof = 0; + } } -void -ncp_free_dir_cache(void) +void ncp_free_dir_cache(void) { - DPRINTK("ncp_free_dir_cache: enter\n"); - - if (c_entry == NULL) - { - return; - } + DPRINTK("ncp_free_dir_cache: enter\n"); + if (c_entry == NULL) { + return; + } vfree(c_entry); c_entry = NULL; - DPRINTK("ncp_free_dir_cache: exit\n"); + DPRINTK("ncp_free_dir_cache: exit\n"); } static struct inode * -ncp_iget(struct inode *dir, struct nw_file_info *finfo) + ncp_iget(struct inode *dir, struct nw_file_info *finfo) { struct inode *inode; - struct ncp_inode_info *new_inode_info; - struct ncp_inode_info *root; + struct ncp_inode_info *new_inode_info; + struct ncp_inode_info *root; - if (dir == NULL) - { + if (dir == NULL) { printk("ncp_iget: dir is NULL\n"); return NULL; } - - if (finfo == NULL) - { + if (finfo == NULL) { printk("ncp_iget: finfo is NULL\n"); return NULL; } + new_inode_info = ncp_kmalloc(sizeof(struct ncp_inode_info), + GFP_KERNEL); - new_inode_info = ncp_kmalloc(sizeof(struct ncp_inode_info), - GFP_KERNEL); - - if (new_inode_info == NULL) - { - printk("ncp_iget: could not alloc mem for %s\n", + if (new_inode_info == NULL) { + printk("ncp_iget: could not alloc mem for %s\n", finfo->i.entryName); - return NULL; - } + return NULL; + } + new_inode_info->state = NCP_INODE_LOOKED_UP; + new_inode_info->nused = 0; + new_inode_info->dir = NCP_INOP(dir); + new_inode_info->finfo = *finfo; - new_inode_info->state = NCP_INODE_LOOKED_UP; - new_inode_info->nused = 0; - new_inode_info->dir = NCP_INOP(dir); - new_inode_info->finfo = *finfo; + NCP_INOP(dir)->nused += 1; - NCP_INOP(dir)->nused += 1; + /* We have to link the new inode_info into the doubly linked + list of inode_infos to make a complete linear search + possible. */ - /* We have to link the new inode_info into the doubly linked - list of inode_infos to make a complete linear search - possible. */ + root = &(NCP_SERVER(dir)->root); - root = &(NCP_SERVER(dir)->root); + new_inode_info->prev = root; + new_inode_info->next = root->next; + root->next->prev = new_inode_info; + root->next = new_inode_info; - new_inode_info->prev = root; - new_inode_info->next = root->next; - root->next->prev = new_inode_info; - root->next = new_inode_info; - if (!(inode = iget(dir->i_sb, ncp_info_ino(NCP_SERVER(dir), - new_inode_info)))) - { + new_inode_info)))) { printk("ncp_iget: iget failed!"); return NULL; } - return inode; } -void -ncp_free_inode_info(struct ncp_inode_info *i) +void ncp_free_inode_info(struct ncp_inode_info *i) { - if (i == NULL) - { - printk("ncp_free_inode: i == NULL\n"); - return; - } - - i->state = NCP_INODE_CACHED; - while ((i->nused == 0) && (i->state == NCP_INODE_CACHED)) - { - struct ncp_inode_info *dir = i->dir; + if (i == NULL) { + printk("ncp_free_inode: i == NULL\n"); + return; + } + i->state = NCP_INODE_CACHED; + while ((i->nused == 0) && (i->state == NCP_INODE_CACHED)) { + struct ncp_inode_info *dir = i->dir; - i->next->prev = i->prev; - i->prev->next = i->next; + i->next->prev = i->prev; + i->prev->next = i->next; DDPRINTK("ncp_free_inode_info: freeing %s\n", i->finfo.i.entryName); - ncp_kfree_s(i, sizeof(struct ncp_inode_info)); + ncp_kfree_s(i, sizeof(struct ncp_inode_info)); - if (dir == i) return; + if (dir == i) + return; - (dir->nused)--; - i = dir; - } + (dir->nused)--; + i = dir; + } } - -void -ncp_init_root(struct ncp_server *server) + +void ncp_init_root(struct ncp_server *server) { - struct ncp_inode_info *root = &(server->root); + struct ncp_inode_info *root = &(server->root); struct nw_info_struct *i = &(root->finfo.i); unsigned short dummy; - DPRINTK("ncp_init_root: server %s\n", server->m.server_name); - DPRINTK("ncp_init_root: i = %x\n", (int)i); + DPRINTK("ncp_init_root: i = %x\n", (int) i); - root->finfo.opened = 0; - i->attributes = aDIR; + root->finfo.opened = 0; + i->attributes = aDIR; i->dataStreamSize = 1024; i->dirEntNum = i->DosDirNum = 0; - i->volNumber = NCP_NUMBER_OF_VOLUMES+1; /* illegal volnum */ + i->volNumber = NCP_NUMBER_OF_VOLUMES + 1; /* illegal volnum */ ncp_date_unix2dos(0, &(i->creationTime), &(i->creationDate)); ncp_date_unix2dos(0, &(i->modifyTime), &(i->modifyDate)); ncp_date_unix2dos(0, &dummy, &(i->lastAccessDate)); + i->creationTime = le16_to_cpu(i->creationTime); + i->creationDate = le16_to_cpu(i->creationDate); + i->modifyTime = le16_to_cpu(i->modifyTime); + i->modifyDate = le16_to_cpu(i->modifyDate); + i->lastAccessDate = le16_to_cpu(i->lastAccessDate); i->nameLen = 0; i->entryName[0] = '\0'; - root->state = NCP_INODE_LOOKED_UP; - root->nused = 1; - root->dir = root; - root->next = root->prev = root; - return; + root->state = NCP_INODE_LOOKED_UP; + root->nused = 1; + root->dir = root; + root->next = root->prev = root; + return; } -int -ncp_conn_logged_in(struct ncp_server *server) +int ncp_conn_logged_in(struct ncp_server *server) { - if (server->m.mounted_vol[0] == '\0') - { + if (server->m.mounted_vol[0] == '\0') { return 0; } - str_upper(server->m.mounted_vol); if (ncp_lookup_volume(server, server->m.mounted_vol, - &(server->root.finfo.i)) != 0) - { + &(server->root.finfo.i)) != 0) { return -ENOENT; } str_lower(server->root.finfo.i.entryName); @@ -701,202 +608,166 @@ ncp_conn_logged_in(struct ncp_server *server) return 0; } -void -ncp_free_all_inodes(struct ncp_server *server) +void ncp_free_all_inodes(struct ncp_server *server) { - /* Here nothing should be to do. I do not know whether it's - better to leave some memory allocated or be stuck in an - endless loop */ + /* Here nothing should be to do. I do not know whether it's + better to leave some memory allocated or be stuck in an + endless loop */ #if 1 - struct ncp_inode_info *root = &(server->root); - - if (root->next != root) - { - printk("ncp_free_all_inodes: INODES LEFT!!!\n"); - } - - while (root->next != root) - { - printk("ncp_free_all_inodes: freeing inode\n"); - ncp_free_inode_info(root->next); - /* In case we have an endless loop.. */ - schedule(); - } -#endif - - return; + struct ncp_inode_info *root = &(server->root); + + if (root->next != root) { + printk("ncp_free_all_inodes: INODES LEFT!!!\n"); + } + while (root->next != root) { + printk("ncp_free_all_inodes: freeing inode\n"); + ncp_free_inode_info(root->next); + /* In case we have an endless loop.. */ + schedule(); + } +#endif + + return; } /* We will search the inode that belongs to this name, currently by a complete linear search through the inodes belonging to this filesystem. This has to be fixed. */ static struct ncp_inode_info * -ncp_find_dir_inode(struct inode *dir, const char *name) + ncp_find_dir_inode(struct inode *dir, const char *name) { struct ncp_server *server = NCP_SERVER(dir); struct nw_info_struct *dir_info = NCP_ISTRUCT(dir); - struct ncp_inode_info *result = &(server->root); + struct ncp_inode_info *result = &(server->root); - if (name == NULL) - { - return NULL; + if (name == NULL) { + return NULL; } - - do - { - if ( (result->dir->finfo.i.dirEntNum == dir_info->dirEntNum) - && (result->dir->finfo.i.volNumber == dir_info->volNumber) + do { + if ((result->dir->finfo.i.dirEntNum == dir_info->dirEntNum) + && (result->dir->finfo.i.volNumber == dir_info->volNumber) && (strcmp(result->finfo.i.entryName, name) == 0) - /* The root dir is never looked up using this - * routine. Without the following test a root - * directory 'sys' in a volume named 'sys' could - * never be looked up, because - * server->root->dir==server->root. */ - && (result != &(server->root))) - { - return result; + /* The root dir is never looked up using this + * routine. Without the following test a root + * directory 'sys' in a volume named 'sys' could + * never be looked up, because + * server->root->dir==server->root. */ + && (result != &(server->root))) { + return result; } - result = result->next; + result = result->next; - } + } while (result != &(server->root)); - return NULL; + return NULL; } -static int -ncp_lookup(struct inode *dir, const char *__name, int len, - struct inode **result) +static int ncp_lookup(struct inode *dir, const char *__name, int len, + struct inode **result) { struct nw_file_info finfo; struct ncp_server *server; struct ncp_inode_info *result_info; int found_in_cache; int down_case = 0; - char name[len+1]; + char name[len + 1]; *result = NULL; - if (!dir || !S_ISDIR(dir->i_mode)) - { + if (!dir || !S_ISDIR(dir->i_mode)) { printk("ncp_lookup: inode is NULL or not a directory.\n"); iput(dir); return -ENOENT; } - server = NCP_SERVER(dir); - if (!ncp_conn_valid(server)) - { + if (!ncp_conn_valid(server)) { iput(dir); return -EIO; } - DPRINTK("ncp_lookup: %s, len %d\n", __name, len); /* Fast cheat for . */ - if (len == 0 || (len == 1 && __name[0] == '.')) - { + if (len == 0 || (len == 1 && __name[0] == '.')) { *result = dir; return 0; } - /* ..and for .. */ - if (len == 2 && __name[0] == '.' && __name[1] == '.') - { + if (len == 2 && __name[0] == '.' && __name[1] == '.') { struct ncp_inode_info *parent = NCP_INOP(dir)->dir; - if (parent->state == NCP_INODE_CACHED) - { + if (parent->state == NCP_INODE_CACHED) { parent->state = NCP_INODE_LOOKED_UP; } - *result = iget(dir->i_sb, ncp_info_ino(server, parent)); iput(dir); - if (*result == 0) - { + if (*result == 0) { return -EACCES; - } - else - { + } else { return 0; } } - memcpy(name, __name, len); name[len] = 0; lock_super(dir->i_sb); result_info = ncp_find_dir_inode(dir, name); - if (result_info != 0) - { - if (result_info->state == NCP_INODE_CACHED) - { - result_info->state = NCP_INODE_LOOKED_UP; + if (result_info != 0) { + if (result_info->state == NCP_INODE_CACHED) { + result_info->state = NCP_INODE_LOOKED_UP; } + /* Here we convert the inode_info address into an + inode number */ - /* Here we convert the inode_info address into an - inode number */ - - *result = iget(dir->i_sb, ncp_info_ino(server, result_info)); + *result = iget(dir->i_sb, ncp_info_ino(server, result_info)); unlock_super(dir->i_sb); - iput(dir); - - if (*result == NULL) - { - return -EACCES; - } + iput(dir); + if (*result == NULL) { + return -EACCES; + } return 0; - } - - /* If the file is in the dir cache, we do not have to ask the - server. */ + } + /* If the file is in the dir cache, we do not have to ask the + server. */ - found_in_cache = 0; + found_in_cache = 0; ncp_lock_dircache(); - if ((dir->i_dev == c_dev) && (dir->i_ino == c_ino)) - { - int first = c_last_returned_index; - int i; + if ((dir->i_dev == c_dev) && (dir->i_ino == c_ino)) { + int first = c_last_returned_index; + int i; - i = first; - do - { - DDPRINTK("ncp_lookup: trying index: %d, name: %s\n", + i = first; + do { + DDPRINTK("ncp_lookup: trying index: %d, name: %s\n", i, c_entry[i].i.entryName); - if (strcmp(c_entry[i].i.entryName, name) == 0) - { - DPRINTK("ncp_lookup: found in cache!\n"); + if (strcmp(c_entry[i].i.entryName, name) == 0) { + DPRINTK("ncp_lookup: found in cache!\n"); finfo.i = c_entry[i].i; found_in_cache = 1; break; - } - i = (i + 1) % c_size; - } + } + i = (i + 1) % c_size; + } while (i != first); - } + } ncp_unlock_dircache(); - if (found_in_cache == 0) - { + if (found_in_cache == 0) { int res; DDPRINTK("ncp_lookup: do_lookup on %s/%s\n", NCP_ISTRUCT(dir)->entryName, name); - if (ncp_is_server_root(dir)) - { + if (ncp_is_server_root(dir)) { str_upper(name); down_case = 1; res = ncp_lookup_volume(server, name, &(finfo.i)); - } - else - { - if (!ncp_preserve_case(dir)) - { + } else { + if (!ncp_preserve_case(dir)) { str_upper(name); down_case = 1; } @@ -905,313 +776,246 @@ ncp_lookup(struct inode *dir, const char *__name, int len, NCP_ISTRUCT(dir)->dirEntNum, name, &(finfo.i)); } - if (res != 0) - { + if (res != 0) { unlock_super(dir->i_sb); - iput(dir); - return -ENOENT; - } - } - + iput(dir); + return -ENOENT; + } + } finfo.opened = 0; - if (down_case != 0) - { + if (down_case != 0) { str_lower(finfo.i.entryName); } - - if (!(*result = ncp_iget(dir, &finfo))) - { + if (!(*result = ncp_iget(dir, &finfo))) { unlock_super(dir->i_sb); iput(dir); return -EACCES; } - unlock_super(dir->i_sb); iput(dir); return 0; } -static int -ncp_create(struct inode *dir, const char *name, int len, int mode, - struct inode **result) +static int ncp_create(struct inode *dir, const char *name, int len, int mode, + struct inode **result) { struct nw_file_info finfo; - __u8 _name[len+1]; + __u8 _name[len + 1]; *result = NULL; - if (!dir || !S_ISDIR(dir->i_mode)) - { + if (!dir || !S_ISDIR(dir->i_mode)) { printk("ncp_create: inode is NULL or not a directory\n"); iput(dir); return -ENOENT; } - if (!ncp_conn_valid(NCP_SERVER(dir))) - { + if (!ncp_conn_valid(NCP_SERVER(dir))) { iput(dir); return -EIO; } - strncpy(_name, name, len); _name[len] = '\0'; - if (!ncp_preserve_case(dir)) - { + if (!ncp_preserve_case(dir)) { str_upper(_name); } - lock_super(dir->i_sb); if (ncp_open_create_file_or_subdir(NCP_SERVER(dir), NCP_ISTRUCT(dir), _name, - OC_MODE_CREATE|OC_MODE_OPEN| + OC_MODE_CREATE | OC_MODE_OPEN | OC_MODE_REPLACE, - 0, AR_READ|AR_WRITE, - &finfo) != 0) - { + 0, AR_READ | AR_WRITE, + &finfo) != 0) { unlock_super(dir->i_sb); iput(dir); return -EACCES; } - ncp_invalid_dir_cache(dir); - if (!ncp_preserve_case(dir)) - { + if (!ncp_preserve_case(dir)) { str_lower(finfo.i.entryName); } - finfo.access = O_RDWR; - if (!(*result = ncp_iget(dir, &finfo)) < 0) - { + if (!(*result = ncp_iget(dir, &finfo)) < 0) { ncp_close_file(NCP_SERVER(dir), finfo.file_handle); unlock_super(dir->i_sb); iput(dir); return -EINVAL; } - unlock_super(dir->i_sb); iput(dir); - return 0; + return 0; } -static int -ncp_mkdir(struct inode *dir, const char *name, int len, int mode) +static int ncp_mkdir(struct inode *dir, const char *name, int len, int mode) { int error; struct nw_file_info new_dir; - __u8 _name[len+1]; + __u8 _name[len + 1]; - if ( (name[0] == '.') - && ( (len == 1) - || ( (len == 2) - && (name[1] == '.')))) - { + if ((name[0] == '.') + && ((len == 1) + || ((len == 2) + && (name[1] == '.')))) { iput(dir); return -EEXIST; } - strncpy(_name, name, len); _name[len] = '\0'; - if (!ncp_preserve_case(dir)) - { + if (!ncp_preserve_case(dir)) { str_upper(_name); } - - if (!dir || !S_ISDIR(dir->i_mode)) - { + if (!dir || !S_ISDIR(dir->i_mode)) { printk("ncp_mkdir: inode is NULL or not a directory\n"); iput(dir); return -ENOENT; } - if (!ncp_conn_valid(NCP_SERVER(dir))) - { + if (!ncp_conn_valid(NCP_SERVER(dir))) { iput(dir); return -EIO; } - if (ncp_open_create_file_or_subdir(NCP_SERVER(dir), NCP_ISTRUCT(dir), _name, OC_MODE_CREATE, aDIR, 0xffff, - &new_dir) != 0) - { + &new_dir) != 0) { error = -EACCES; - } - else - { + } else { error = 0; - ncp_invalid_dir_cache(dir); - } + ncp_invalid_dir_cache(dir); + } iput(dir); return error; } -static int -ncp_rmdir(struct inode *dir, const char *name, int len) +static int ncp_rmdir(struct inode *dir, const char *name, int len) { int error; - __u8 _name[len+1]; + __u8 _name[len + 1]; - if (!dir || !S_ISDIR(dir->i_mode)) - { + if (!dir || !S_ISDIR(dir->i_mode)) { printk("ncp_rmdir: inode is NULL or not a directory\n"); iput(dir); return -ENOENT; } - if (!ncp_conn_valid(NCP_SERVER(dir))) - { + if (!ncp_conn_valid(NCP_SERVER(dir))) { iput(dir); return -EIO; } - if (ncp_find_dir_inode(dir, name) != NULL) - { + if (ncp_find_dir_inode(dir, name) != NULL) { iput(dir); - error = -EBUSY; - } - else - { + error = -EBUSY; + } else { strncpy(_name, name, len); _name[len] = '\0'; - if (!ncp_preserve_case(dir)) - { + if (!ncp_preserve_case(dir)) { str_upper(_name); } - - if ((error = ncp_del_file_or_subdir(NCP_SERVER(dir), + if ((error = ncp_del_file_or_subdir(NCP_SERVER(dir), NCP_ISTRUCT(dir), - _name)) == 0) - { - ncp_invalid_dir_cache(dir); - } - else - { + _name)) == 0) { + ncp_invalid_dir_cache(dir); + } else { error = -EACCES; } - } + } iput(dir); return error; } -static int -ncp_unlink(struct inode *dir, const char *name, int len) +static int ncp_unlink(struct inode *dir, const char *name, int len) { int error; - __u8 _name[len+1]; + __u8 _name[len + 1]; - if (!dir || !S_ISDIR(dir->i_mode)) - { + if (!dir || !S_ISDIR(dir->i_mode)) { printk("ncp_unlink: inode is NULL or not a directory\n"); iput(dir); return -ENOENT; } - if (!ncp_conn_valid(NCP_SERVER(dir))) - { + if (!ncp_conn_valid(NCP_SERVER(dir))) { iput(dir); return -EIO; } - if (ncp_find_dir_inode(dir, name) != NULL) - { + if (ncp_find_dir_inode(dir, name) != NULL) { iput(dir); - error = -EBUSY; - } - else - { + error = -EBUSY; + } else { strncpy(_name, name, len); _name[len] = '\0'; - if (!ncp_preserve_case(dir)) - { + if (!ncp_preserve_case(dir)) { str_upper(_name); } - - if ((error = ncp_del_file_or_subdir(NCP_SERVER(dir), + if ((error = ncp_del_file_or_subdir(NCP_SERVER(dir), NCP_ISTRUCT(dir), - _name)) == 0) - { - ncp_invalid_dir_cache(dir); - } - else - { + _name)) == 0) { + ncp_invalid_dir_cache(dir); + } else { error = -EACCES; } - } + } iput(dir); return error; } -static int -ncp_rename(struct inode *old_dir, const char *old_name, int old_len, - struct inode *new_dir, const char *new_name, int new_len, - int must_be_dir) +static int ncp_rename(struct inode *old_dir, const char *old_name, int old_len, + struct inode *new_dir, const char *new_name, int new_len, + int must_be_dir) { int res; - char _old_name[old_len+1]; - char _new_name[new_len+1]; + char _old_name[old_len + 1]; + char _new_name[new_len + 1]; - if (!old_dir || !S_ISDIR(old_dir->i_mode)) - { + if (!old_dir || !S_ISDIR(old_dir->i_mode)) { printk("ncp_rename: old inode is NULL or not a directory\n"); - res = -ENOENT; - goto finished; + res = -ENOENT; + goto finished; } - - if (!ncp_conn_valid(NCP_SERVER(old_dir))) - { + if (!ncp_conn_valid(NCP_SERVER(old_dir))) { res = -EIO; goto finished; } - - if (!new_dir || !S_ISDIR(new_dir->i_mode)) - { + if (!new_dir || !S_ISDIR(new_dir->i_mode)) { printk("ncp_rename: new inode is NULL or not a directory\n"); - res = -ENOENT; - goto finished; + res = -ENOENT; + goto finished; + } + if ((ncp_find_dir_inode(old_dir, old_name) != NULL) + || (ncp_find_dir_inode(new_dir, new_name) != NULL)) { + res = -EBUSY; + goto finished; } - - if ( (ncp_find_dir_inode(old_dir, old_name) != NULL) - || (ncp_find_dir_inode(new_dir, new_name) != NULL)) - { - res = -EBUSY; - goto finished; - } - strncpy(_old_name, old_name, old_len); _old_name[old_len] = '\0'; - if (!ncp_preserve_case(old_dir)) - { + if (!ncp_preserve_case(old_dir)) { str_upper(_old_name); } - strncpy(_new_name, new_name, new_len); _new_name[new_len] = '\0'; - if (!ncp_preserve_case(new_dir)) - { + if (!ncp_preserve_case(new_dir)) { str_upper(_new_name); } - res = ncp_ren_or_mov_file_or_subdir(NCP_SERVER(old_dir), - NCP_ISTRUCT(old_dir), _old_name, - NCP_ISTRUCT(new_dir), _new_name); - - if (res == 0) - { - ncp_invalid_dir_cache(old_dir); - ncp_invalid_dir_cache(new_dir); - } - else - { + NCP_ISTRUCT(old_dir), _old_name, + NCP_ISTRUCT(new_dir), _new_name); + + if (res == 0) { + ncp_invalid_dir_cache(old_dir); + ncp_invalid_dir_cache(new_dir); + } else { res = -EACCES; } - - finished: - iput(old_dir); + + finished: + iput(old_dir); iput(new_dir); return res; } @@ -1220,64 +1024,62 @@ ncp_rename(struct inode *old_dir, const char *old_name, int old_len, /* Linear day numbers of the respective 1sts in non-leap years. */ -static int day_n[] = { 0,31,59,90,120,151,181,212,243,273,304,334,0,0,0,0 }; +static int day_n[] = +{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0}; /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */ extern struct timezone sys_tz; -static int -utc2local(int time) +static int utc2local(int time) { - return time - sys_tz.tz_minuteswest*60 + + return time - sys_tz.tz_minuteswest * 60 + (sys_tz.tz_dsttime ? 3600 : 0); } -static int -local2utc(int time) +static int local2utc(int time) { - return time + sys_tz.tz_minuteswest*60 - + return time + sys_tz.tz_minuteswest * 60 - (sys_tz.tz_dsttime ? 3600 : 0); } /* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ -int -ncp_date_dos2unix(unsigned short time,unsigned short date) +int ncp_date_dos2unix(unsigned short time, unsigned short date) { - int month,year,secs; + int month, year, secs; - month = ((date >> 5) & 15)-1; + month = ((date >> 5) & 15) - 1; year = date >> 9; - secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400* - ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && - month < 2 ? 1 : 0)+3653); - /* days since 1.1.70 plus 80's leap day */ + secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 + 86400 * + ((date & 31) - 1 + day_n[month] + (year / 4) + year * 365 - ((year & 3) == 0 && + month < 2 ? 1 : 0) + 3653); + /* days since 1.1.70 plus 80's leap day */ return local2utc(secs); } /* Convert linear UNIX date to a MS-DOS time/date pair. */ -void -ncp_date_unix2dos(int unix_date,unsigned short *time, unsigned short *date) +void ncp_date_unix2dos(int unix_date, unsigned short *time, unsigned short *date) { - int day,year,nl_day,month; + int day, year, nl_day, month; unix_date = utc2local(unix_date); - *time = (unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ - (((unix_date/3600) % 24) << 11); - day = unix_date/86400-3652; - year = day/365; - if ((year+3)/4+365*year > day) year--; - day -= (year+3)/4+365*year; + *time = (unix_date % 60) / 2 + (((unix_date / 60) % 60) << 5) + + (((unix_date / 3600) % 24) << 11); + day = unix_date / 86400 - 3652; + year = day / 365; + if ((year + 3) / 4 + 365 * year > day) + year--; + day -= (year + 3) / 4 + 365 * year; if (day == 59 && !(year & 3)) { nl_day = day; month = 2; - } - else { - nl_day = (year & 3) || day <= 59 ? day : day-1; + } else { + nl_day = (year & 3) || day <= 59 ? day : day - 1; for (month = 0; month < 12; month++) - if (day_n[month] > nl_day) break; + if (day_n[month] > nl_day) + break; } - *date = nl_day-day_n[month-1]+1+(month << 5)+(year << 9); + *date = nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9); } diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 12b646f91..9bdc793cc 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -21,215 +21,172 @@ static inline int min(int a, int b) { - return a<b ? a : b; + return a < b ? a : b; } -static int -ncp_fsync(struct inode *inode, struct file *file) +static int ncp_fsync(struct inode *inode, struct file *file) { return 0; } -int -ncp_make_open(struct inode *i, int right) +int ncp_make_open(struct inode *i, int right) { - struct nw_file_info *finfo; + struct nw_file_info *finfo; - if (i == NULL) - { - printk("ncp_make_open: got NULL inode\n"); - return -EINVAL; - } - - finfo = NCP_FINFO(i); + if (i == NULL) { + printk("ncp_make_open: got NULL inode\n"); + return -EINVAL; + } + finfo = NCP_FINFO(i); - DPRINTK("ncp_make_open: dirent->opened = %d\n", finfo->opened); + DPRINTK("ncp_make_open: dirent->opened = %d\n", finfo->opened); lock_super(i->i_sb); - if (finfo->opened == 0) - { + if (finfo->opened == 0) { finfo->access = -1; - /* tries max. rights */ + /* tries max. rights */ if (ncp_open_create_file_or_subdir(NCP_SERVER(i), NULL, NULL, OC_MODE_OPEN, 0, AR_READ | AR_WRITE, - finfo) == 0) - { + finfo) == 0) { finfo->access = O_RDWR; - } - else if (ncp_open_create_file_or_subdir(NCP_SERVER(i), - NULL, NULL, - OC_MODE_OPEN, 0, - AR_READ, - finfo) == 0) - { + } else if (ncp_open_create_file_or_subdir(NCP_SERVER(i), + NULL, NULL, + OC_MODE_OPEN, 0, + AR_READ, + finfo) == 0) { finfo->access = O_RDONLY; } - } - + } unlock_super(i->i_sb); - if ( ((right == O_RDONLY) && ( (finfo->access == O_RDONLY) - || (finfo->access == O_RDWR))) - || ((right == O_WRONLY) && ( (finfo->access == O_WRONLY) - || (finfo->access == O_RDWR))) - || ((right == O_RDWR) && (finfo->access == O_RDWR))) - return 0; + if (((right == O_RDONLY) && ((finfo->access == O_RDONLY) + || (finfo->access == O_RDWR))) + || ((right == O_WRONLY) && ((finfo->access == O_WRONLY) + || (finfo->access == O_RDWR))) + || ((right == O_RDWR) && (finfo->access == O_RDWR))) + return 0; - return -EACCES; + return -EACCES; } -static long -ncp_file_read(struct inode *inode, struct file *file, char *buf, unsigned long count) +static long ncp_file_read(struct inode *inode, struct file *file, char *buf, unsigned long count) { int bufsize, already_read; off_t pos; - int errno; + int errno; - DPRINTK("ncp_file_read: enter %s\n", NCP_ISTRUCT(inode)->entryName); - - if (inode == NULL) - { + DPRINTK("ncp_file_read: enter %s\n", NCP_ISTRUCT(inode)->entryName); + + if (inode == NULL) { DPRINTK("ncp_file_read: inode = NULL\n"); return -EINVAL; } - if (!ncp_conn_valid(NCP_SERVER(inode))) - { + if (!ncp_conn_valid(NCP_SERVER(inode))) { return -EIO; } - - if (!S_ISREG(inode->i_mode)) - { + if (!S_ISREG(inode->i_mode)) { DPRINTK("ncp_file_read: read from non-file, mode %07o\n", - inode->i_mode); + inode->i_mode); return -EINVAL; } - pos = file->f_pos; - if (pos + count > inode->i_size) - { + if (pos + count > inode->i_size) { count = inode->i_size - pos; } - - if (count <= 0) - { + if (count <= 0) { return 0; } - - if ((errno = ncp_make_open(inode, O_RDONLY)) != 0) - { - return errno; + if ((errno = ncp_make_open(inode, O_RDONLY)) != 0) { + return errno; } - bufsize = NCP_SERVER(inode)->buffer_size; - already_read = 0; + already_read = 0; /* First read in as much as possible for each bufsize. */ - while (already_read < count) - { + while (already_read < count) { int read_this_time; int to_read = min(bufsize - (pos % bufsize), count - already_read); if (ncp_read(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, - pos, to_read, buf, &read_this_time) != 0) - { - return -EIO; /* This is not exact, i know.. */ + pos, to_read, buf, &read_this_time) != 0) { + return -EIO; /* This is not exact, i know.. */ } - pos += read_this_time; buf += read_this_time; - already_read += read_this_time; + already_read += read_this_time; - if (read_this_time < to_read) - { - break; + if (read_this_time < to_read) { + break; } } - file->f_pos = pos; + file->f_pos = pos; - if (!IS_RDONLY(inode)) - { + if (!IS_RDONLY(inode)) { inode->i_atime = CURRENT_TIME; } - inode->i_dirt = 1; - DPRINTK("ncp_file_read: exit %s\n", NCP_ISTRUCT(inode)->entryName); + DPRINTK("ncp_file_read: exit %s\n", NCP_ISTRUCT(inode)->entryName); - return already_read; + return already_read; } -static long -ncp_file_write(struct inode *inode, struct file *file, const char *buf, - unsigned long count) +static long ncp_file_write(struct inode *inode, struct file *file, const char *buf, + unsigned long count) { int bufsize, already_written; - off_t pos; - int errno; - - if (inode == NULL) - { + off_t pos; + int errno; + + if (inode == NULL) { DPRINTK("ncp_file_write: inode = NULL\n"); return -EINVAL; } - if (!ncp_conn_valid(NCP_SERVER(inode))) - { + if (!ncp_conn_valid(NCP_SERVER(inode))) { return -EIO; } - - if (!S_ISREG(inode->i_mode)) - { + if (!S_ISREG(inode->i_mode)) { DPRINTK("ncp_file_write: write to non-file, mode %07o\n", - inode->i_mode); + inode->i_mode); return -EINVAL; } + DPRINTK("ncp_file_write: enter %s\n", NCP_ISTRUCT(inode)->entryName); - DPRINTK("ncp_file_write: enter %s\n", NCP_ISTRUCT(inode)->entryName); - - if (count <= 0) - { + if (count <= 0) { return 0; } - - if ((errno = ncp_make_open(inode, O_RDWR)) != 0) - { - return errno; + if ((errno = ncp_make_open(inode, O_RDWR)) != 0) { + return errno; } - pos = file->f_pos; - if (file->f_flags & O_APPEND) - { + if (file->f_flags & O_APPEND) { pos = inode->i_size; } - bufsize = NCP_SERVER(inode)->buffer_size; - already_written = 0; + already_written = 0; - while (already_written < count) - { + while (already_written < count) { int written_this_time; int to_write = min(bufsize - (pos % bufsize), count - already_written); if (ncp_write(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, - pos, to_write, buf, &written_this_time) != 0) - { + pos, to_write, buf, &written_this_time) != 0) { return -EIO; - } - + } pos += written_this_time; buf += written_this_time; already_written += written_this_time; - if (written_this_time < to_write) - { + if (written_this_time < to_write) { break; } } @@ -239,31 +196,31 @@ ncp_file_write(struct inode *inode, struct file *file, const char *buf, file->f_pos = pos; - if (pos > inode->i_size) - { - inode->i_size = pos; + if (pos > inode->i_size) { + inode->i_size = pos; ncp_invalid_dir_cache(NCP_INOP(inode)->dir->inode); - } - - DPRINTK("ncp_file_write: exit %s\n", NCP_ISTRUCT(inode)->entryName); + } + DPRINTK("ncp_file_write: exit %s\n", NCP_ISTRUCT(inode)->entryName); return already_written; } -static struct file_operations ncp_file_operations = { +static struct file_operations ncp_file_operations = +{ NULL, /* lseek - default */ ncp_file_read, /* read */ ncp_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ ncp_ioctl, /* ioctl */ ncp_mmap, /* mmap */ - NULL, /* open */ - NULL, /* release */ + NULL, /* open */ + NULL, /* release */ ncp_fsync, /* fsync */ }; -struct inode_operations ncp_file_inode_operations = { +struct inode_operations ncp_file_inode_operations = +{ &ncp_file_operations, /* default file operations */ NULL, /* create */ NULL, /* lookup */ diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 55a0c4aae..1db0dcc78 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -2,14 +2,16 @@ * inode.c * * Copyright (C) 1995, 1996 by Volker Lendecke + * Modified for big endian by J.F. Chadima and David S. Miller * */ -#include <linux/module.h> #include <linux/config.h> +#include <linux/module.h> #include <asm/system.h> #include <asm/uaccess.h> +#include <asm/byteorder.h> #include <linux/sched.h> #include <linux/ncp_fs.h> @@ -21,6 +23,7 @@ #include <linux/locks.h> #include <linux/fcntl.h> #include <linux/malloc.h> +#include <linux/init.h> #ifdef CONFIG_KERNELD #include <linux/kerneld.h> #endif @@ -34,8 +37,9 @@ static void ncp_put_super(struct super_block *); static void ncp_statfs(struct super_block *sb, struct statfs *buf, int bufsiz); static int ncp_notify_change(struct inode *inode, struct iattr *attr); -static struct super_operations ncp_sops = { - ncp_read_inode, /* read inode */ +static struct super_operations ncp_sops = +{ + ncp_read_inode, /* read inode */ ncp_notify_change, /* notify change */ NULL, /* write inode */ ncp_put_inode, /* put inode */ @@ -49,363 +53,249 @@ static struct super_operations ncp_sops = { ncp_inode_info's and initializes the inode from the data found there. It does not allocate or deallocate anything. */ -static void -ncp_read_inode(struct inode *inode) +static void ncp_read_inode(struct inode *inode) { - /* Our task should be extremely simple here. We only have to - look up the information somebody else (ncp_iget) put into - the inode tree. The address of this information is the - inode->i_ino. Just to make sure everything went well, we - check it's there. */ + /* Our task should be extremely simple here. We only have to + look up the information somebody else (ncp_iget) put into + the inode tree. The address of this information is the + inode->i_ino. Just to make sure everything went well, we + check it's there. */ - struct ncp_inode_info *inode_info = ncp_find_inode(inode); + struct ncp_inode_info *inode_info = ncp_find_inode(inode); - if (inode_info == NULL) - { + if (inode_info == NULL) { /* Ok, now we're in trouble. The inode info is not there. What should we do now??? */ printk("ncp_read_inode: inode info not found\n"); return; } + inode_info->state = NCP_INODE_VALID; - inode_info->state = NCP_INODE_VALID; - - NCP_INOP(inode) = inode_info; + NCP_INOP(inode) = inode_info; inode_info->inode = inode; - if (NCP_ISTRUCT(inode)->attributes & aDIR) - { - inode->i_mode = NCP_SERVER(inode)->m.dir_mode; + if (NCP_ISTRUCT(inode)->attributes & aDIR) { + inode->i_mode = NCP_SERVER(inode)->m.dir_mode; /* for directories dataStreamSize seems to be some Object ID ??? */ inode->i_size = 512; - } - else - { - inode->i_mode = NCP_SERVER(inode)->m.file_mode; - inode->i_size = NCP_ISTRUCT(inode)->dataStreamSize; + } else { + inode->i_mode = NCP_SERVER(inode)->m.file_mode; + inode->i_size = le32_to_cpu(NCP_ISTRUCT(inode)->dataStreamSize); } - DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode); + DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode); - inode->i_nlink = 1; - inode->i_uid = NCP_SERVER(inode)->m.uid; - inode->i_gid = NCP_SERVER(inode)->m.gid; - inode->i_blksize = 512; - inode->i_rdev = 0; + inode->i_nlink = 1; + inode->i_uid = NCP_SERVER(inode)->m.uid; + inode->i_gid = NCP_SERVER(inode)->m.gid; + inode->i_blksize = 512; + inode->i_rdev = 0; - if ((inode->i_blksize != 0) && (inode->i_size != 0)) - { - inode->i_blocks = - (inode->i_size - 1) / inode->i_blksize + 1; - } - else - { - inode->i_blocks = 0; + if ((inode->i_blksize != 0) && (inode->i_size != 0)) { + inode->i_blocks = + (inode->i_size - 1) / inode->i_blksize + 1; + } else { + inode->i_blocks = 0; } - inode->i_mtime = ncp_date_dos2unix(NCP_ISTRUCT(inode)->modifyTime, - NCP_ISTRUCT(inode)->modifyDate); - inode->i_ctime = ncp_date_dos2unix(NCP_ISTRUCT(inode)->creationTime, - NCP_ISTRUCT(inode)->creationDate); + inode->i_mtime = ncp_date_dos2unix(le16_to_cpu(NCP_ISTRUCT(inode)->modifyTime), + le16_to_cpu(NCP_ISTRUCT(inode)->modifyDate)); + inode->i_ctime = ncp_date_dos2unix(le16_to_cpu(NCP_ISTRUCT(inode)->creationTime), + le16_to_cpu(NCP_ISTRUCT(inode)->creationDate)); inode->i_atime = ncp_date_dos2unix(0, - NCP_ISTRUCT(inode)->lastAccessDate); - - if (S_ISREG(inode->i_mode)) - { - inode->i_op = &ncp_file_inode_operations; - } - else if (S_ISDIR(inode->i_mode)) - { - inode->i_op = &ncp_dir_inode_operations; - } - else - { - inode->i_op = NULL; + le16_to_cpu(NCP_ISTRUCT(inode)->lastAccessDate)); + + if (S_ISREG(inode->i_mode)) { + inode->i_op = &ncp_file_inode_operations; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &ncp_dir_inode_operations; + } else { + inode->i_op = NULL; } } -static void -ncp_put_inode(struct inode *inode) +static void ncp_put_inode(struct inode *inode) { - struct nw_file_info *finfo = NCP_FINFO(inode); + struct nw_file_info *finfo = NCP_FINFO(inode); struct super_block *sb = inode->i_sb; lock_super(sb); - if (finfo->opened != 0) - { - if (ncp_close_file(NCP_SERVER(inode), finfo->file_handle)!=0) - { - /* We can't do anything but complain. */ - printk("ncp_put_inode: could not close\n"); - } - } - + if (finfo->opened != 0) { + if (ncp_close_file(NCP_SERVER(inode), finfo->file_handle) != 0) { + /* We can't do anything but complain. */ + printk("ncp_put_inode: could not close\n"); + } + } DDPRINTK("ncp_put_inode: put %s\n", - finfo->i.entryName); + finfo->i.entryName); - ncp_free_inode_info(NCP_INOP(inode)); + ncp_free_inode_info(NCP_INOP(inode)); - if (S_ISDIR(inode->i_mode)) - { - DDPRINTK("ncp_put_inode: put directory %ld\n", + if (S_ISDIR(inode->i_mode)) { + DDPRINTK("ncp_put_inode: put directory %ld\n", inode->i_ino); - ncp_invalid_dir_cache(inode); - } - + ncp_invalid_dir_cache(inode); + } clear_inode(inode); unlock_super(sb); } struct super_block * -ncp_read_super(struct super_block *sb, void *raw_data, int silent) + ncp_read_super(struct super_block *sb, void *raw_data, int silent) { struct ncp_mount_data *data = (struct ncp_mount_data *) raw_data; - struct ncp_server *server; + struct ncp_server *server; struct file *ncp_filp; - struct file *wdog_filp; - struct file *msg_filp; kdev_t dev = sb->s_dev; int error; - if (data == NULL) - { + if (data == NULL) { printk("ncp_read_super: missing data argument\n"); sb->s_dev = 0; return NULL; } - - if (data->version != NCP_MOUNT_VERSION) - { + if (data->version != NCP_MOUNT_VERSION) { printk("ncp warning: mount version %s than kernel\n", (data->version < NCP_MOUNT_VERSION) ? - "older" : "newer"); + "older" : "newer"); sb->s_dev = 0; return NULL; } - - if ( (data->ncp_fd >= NR_OPEN) + if ((data->ncp_fd >= NR_OPEN) || ((ncp_filp = current->files->fd[data->ncp_fd]) == NULL) - || (!S_ISSOCK(ncp_filp->f_inode->i_mode))) - { + || (!S_ISSOCK(ncp_filp->f_inode->i_mode))) { printk("ncp_read_super: invalid ncp socket\n"); sb->s_dev = 0; return NULL; } + /* We must malloc our own super-block info */ + server = (struct ncp_server *) ncp_kmalloc(sizeof(struct ncp_server), + GFP_KERNEL); - if ( (data->wdog_fd >= NR_OPEN) - || ((wdog_filp = current->files->fd[data->wdog_fd]) == NULL) - || (!S_ISSOCK(wdog_filp->f_inode->i_mode))) - { - printk("ncp_read_super: invalid wdog socket\n"); - sb->s_dev = 0; - return NULL; - } - - if ( (data->message_fd >= NR_OPEN) - || ((msg_filp = current->files->fd[data->message_fd]) == NULL) - || (!S_ISSOCK(msg_filp->f_inode->i_mode))) - { - printk("ncp_read_super: invalid wdog socket\n"); - sb->s_dev = 0; + if (server == NULL) { + printk("ncp_read_super: could not alloc ncp_server\n"); return NULL; } - - /* We must malloc our own super-block info */ - server = (struct ncp_server *)ncp_kmalloc(sizeof(struct ncp_server), - GFP_KERNEL); - - if (server == NULL) - { - printk("ncp_read_super: could not alloc ncp_server\n"); - return NULL; - } - ncp_filp->f_count += 1; - wdog_filp->f_count += 1; - msg_filp->f_count += 1; lock_super(sb); - NCP_SBP(sb) = server; - - sb->s_blocksize = 1024; /* Eh... Is this correct? */ + NCP_SBP(sb) = server; + + sb->s_blocksize = 1024; /* Eh... Is this correct? */ sb->s_blocksize_bits = 10; sb->s_magic = NCP_SUPER_MAGIC; sb->s_dev = dev; sb->s_op = &ncp_sops; - server->ncp_filp = ncp_filp; - server->wdog_filp = wdog_filp; - server->msg_filp = msg_filp; - server->lock = 0; - server->wait = NULL; - server->packet = NULL; + server->ncp_filp = ncp_filp; + server->lock = 0; + server->wait = NULL; + server->packet = NULL; server->buffer_size = 0; server->conn_status = 0; - server->m = *data; + server->m = *data; + /* Althought anything producing this is buggy, it happens + now because of PATH_MAX changes.. */ + if (server->m.time_out < 10) { + server->m.time_out = 10; + printk("You need to recompile your ncpfs utils..\n"); + } server->m.file_mode = (server->m.file_mode & - (S_IRWXU|S_IRWXG|S_IRWXO)) | S_IFREG; - server->m.dir_mode = (server->m.dir_mode & - (S_IRWXU|S_IRWXG|S_IRWXO)) | S_IFDIR; - - /* protect against invalid mount points */ - server->m.mount_point[sizeof(server->m.mount_point)-1] = '\0'; + (S_IRWXU | S_IRWXG | S_IRWXO)) | S_IFREG; + server->m.dir_mode = (server->m.dir_mode & + (S_IRWXU | S_IRWXG | S_IRWXO)) | S_IFDIR; server->packet_size = NCP_PACKET_SIZE; - server->packet = ncp_kmalloc(NCP_PACKET_SIZE, GFP_KERNEL); + server->packet = ncp_kmalloc(NCP_PACKET_SIZE, GFP_KERNEL); - if (server->packet == NULL) - { + if (server->packet == NULL) { printk("ncpfs: could not alloc packet\n"); error = -ENOMEM; unlock_super(sb); goto fail; } - - /* - * Make the connection to the server - */ - - if (ncp_catch_watchdog(server) != 0) - { - printk("ncp_read_super: Could not catch watchdog\n"); - error = -EINVAL; - unlock_super(sb); - goto fail; - } - - if (ncp_catch_message(server) != 0) - { - printk("ncp_read_super: Could not catch messages\n"); - ncp_dont_catch_watchdog(server); - error = -EINVAL; - unlock_super(sb); - goto fail; - } - ncp_lock_server(server); error = ncp_connect(server); ncp_unlock_server(server); unlock_super(sb); - if (error < 0) - { + if (error < 0) { sb->s_dev = 0; printk("ncp_read_super: Failed connection, bailing out " - "(error = %d).\n", -error); - ncp_kfree_s(server->packet, server->packet_size); - ncp_dont_catch_watchdog(server); - goto fail; + "(error = %d).\n", -error); + ncp_kfree_s(server->packet, server->packet_size); + goto fail; } - - DPRINTK("ncp_read_super: NCP_SBP(sb) = %x\n", (int)NCP_SBP(sb)); + DPRINTK("ncp_read_super: NCP_SBP(sb) = %x\n", (int) NCP_SBP(sb)); ncp_init_root(server); - if (!(sb->s_mounted = iget(sb, ncp_info_ino(server, &(server->root))))) - { + if (!(sb->s_mounted = iget(sb, ncp_info_ino(server, + &(server->root))))) { sb->s_dev = 0; printk("ncp_read_super: get root inode failed\n"); - goto disconnect; + goto disconnect; } - if (ncp_negotiate_buffersize(server, NCP_DEFAULT_BUFSIZE, - &(server->buffer_size)) != 0) - { + &(server->buffer_size)) != 0) { sb->s_dev = 0; printk("ncp_read_super: could not get bufsize\n"); goto disconnect; } - DPRINTK("ncpfs: bufsize = %d\n", server->buffer_size); - MOD_INC_USE_COUNT; + MOD_INC_USE_COUNT; return sb; - disconnect: + disconnect: ncp_lock_server(server); ncp_disconnect(server); ncp_unlock_server(server); ncp_kfree_s(server->packet, server->packet_size); - ncp_dont_catch_watchdog(server); - fail: + fail: ncp_filp->f_count -= 1; - wdog_filp->f_count -= 1; - msg_filp->f_count -= 1; - ncp_kfree_s(NCP_SBP(sb), sizeof(struct ncp_server)); - return NULL; + ncp_kfree_s(NCP_SBP(sb), sizeof(struct ncp_server)); + return NULL; } -static void -ncp_put_super(struct super_block *sb) +static void ncp_put_super(struct super_block *sb) { - struct ncp_server *server = NCP_SBP(sb); + struct ncp_server *server = NCP_SBP(sb); lock_super(sb); ncp_lock_server(server); - ncp_disconnect(server); + ncp_disconnect(server); ncp_unlock_server(server); close_fp(server->ncp_filp); + kill_proc(server->m.wdog_pid, SIGTERM, 0); - ncp_dont_catch_watchdog(server); - close_fp(server->wdog_filp); - close_fp(server->msg_filp); + ncp_free_all_inodes(server); - ncp_free_all_inodes(server); - - ncp_kfree_s(server->packet, server->packet_size); + ncp_kfree_s(server->packet, server->packet_size); sb->s_dev = 0; - ncp_kfree_s(NCP_SBP(sb), sizeof(struct ncp_server)); + ncp_kfree_s(NCP_SBP(sb), sizeof(struct ncp_server)); NCP_SBP(sb) = NULL; unlock_super(sb); - MOD_DEC_USE_COUNT; + MOD_DEC_USE_COUNT; } -/* This routine is called from an interrupt in ncp_msg_data_ready. So - * we have to be careful NOT to sleep here! */ -void -ncp_trigger_message(struct ncp_server *server) -{ -#ifdef CONFIG_KERNELD - char command[ sizeof(server->m.mount_point) - + sizeof(NCP_MSG_COMMAND) + 2]; -#endif - - if (server == NULL) - { - printk("ncp_trigger_message: invalid server!\n"); - return; - } - - DPRINTK("ncp_trigger_message: on %s\n", - server->m.mount_point); - -#ifdef CONFIG_KERNELD - strcpy(command, NCP_MSG_COMMAND); - strcat(command, " "); - strcat(command, server->m.mount_point); - DPRINTK("ksystem: %s\n", command); - ksystem(command, KERNELD_NOWAIT); -#endif -} - -static void -ncp_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) +static void ncp_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) { struct statfs tmp; - + /* We cannot say how much disk space is left on a mounted - NetWare Server, because free space is distributed over - volumes, and the current user might have disk quotas. So - free space is not that simple to determine. Our decision - here is to err conservatively. */ + NetWare Server, because free space is distributed over + volumes, and the current user might have disk quotas. So + free space is not that simple to determine. Our decision + here is to err conservatively. */ tmp.f_type = NCP_SUPER_MAGIC; tmp.f_bsize = 512; @@ -418,28 +308,25 @@ ncp_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) copy_to_user(buf, &tmp, bufsiz); } -static int -ncp_notify_change(struct inode *inode, struct iattr *attr) +static int ncp_notify_change(struct inode *inode, struct iattr *attr) { int result = 0; int info_mask; struct nw_modify_dos_info info; - if (!ncp_conn_valid(NCP_SERVER(inode))) - { + if (!ncp_conn_valid(NCP_SERVER(inode))) { return -EIO; } - if ((result = inode_change_ok(inode, attr)) < 0) return result; - if (((attr->ia_valid & ATTR_UID) && + if (((attr->ia_valid & ATTR_UID) && (attr->ia_uid != NCP_SERVER(inode)->m.uid))) return -EPERM; - if (((attr->ia_valid & ATTR_GID) && + if (((attr->ia_valid & ATTR_GID) && (attr->ia_uid != NCP_SERVER(inode)->m.gid))) - return -EPERM; + return -EPERM; if (((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & @@ -449,62 +336,54 @@ ncp_notify_change(struct inode *inode, struct iattr *attr) info_mask = 0; memset(&info, 0, sizeof(info)); - if ((attr->ia_valid & ATTR_CTIME) != 0) - { - info_mask |= (DM_CREATE_TIME|DM_CREATE_DATE); + if ((attr->ia_valid & ATTR_CTIME) != 0) { + info_mask |= (DM_CREATE_TIME | DM_CREATE_DATE); ncp_date_unix2dos(attr->ia_ctime, - &(info.creationTime), &(info.creationDate)); + &(info.creationTime), &(info.creationDate)); + info.creationTime = le16_to_cpu(info.creationTime); + info.creationDate = le16_to_cpu(info.creationDate); } - - if ((attr->ia_valid & ATTR_MTIME) != 0) - { - info_mask |= (DM_MODIFY_TIME|DM_MODIFY_DATE); + if ((attr->ia_valid & ATTR_MTIME) != 0) { + info_mask |= (DM_MODIFY_TIME | DM_MODIFY_DATE); ncp_date_unix2dos(attr->ia_mtime, &(info.modifyTime), &(info.modifyDate)); + info.modifyTime = le16_to_cpu(info.modifyTime); + info.modifyDate = le16_to_cpu(info.modifyDate); } - - if ((attr->ia_valid & ATTR_ATIME) != 0) - { + if ((attr->ia_valid & ATTR_ATIME) != 0) { __u16 dummy; info_mask |= (DM_LAST_ACCESS_DATE); ncp_date_unix2dos(attr->ia_ctime, &(dummy), &(info.lastAccessDate)); + info.lastAccessDate = le16_to_cpu(info.lastAccessDate); } - - if (info_mask != 0) - { + if (info_mask != 0) { if ((result = ncp_modify_file_or_subdir_dos_info(NCP_SERVER(inode), - NCP_ISTRUCT(inode), + NCP_ISTRUCT(inode), info_mask, - &info)) != 0) - { + &info)) != 0) { result = -EACCES; - if (info_mask == (DM_CREATE_TIME|DM_CREATE_DATE)) - { + if (info_mask == (DM_CREATE_TIME | DM_CREATE_DATE)) { /* NetWare seems not to allow this. I - do not know why. So, just tell the - user everything went fine. This is - a terrible hack, but I do not know - how to do this correctly. */ + do not know why. So, just tell the + user everything went fine. This is + a terrible hack, but I do not know + how to do this correctly. */ result = 0; } } } - - if ((attr->ia_valid & ATTR_SIZE) != 0) - { + if ((attr->ia_valid & ATTR_SIZE) != 0) { int written; DPRINTK("ncpfs: trying to change size of %s to %ld\n", NCP_ISTRUCT(inode)->entryName, attr->ia_size); - if ((result = ncp_make_open(inode, O_RDWR)) < 0) - { + if ((result = ncp_make_open(inode, O_RDWR)) < 0) { return -EACCES; } - ncp_write(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, attr->ia_size, 0, "", &written); @@ -516,54 +395,50 @@ ncp_notify_change(struct inode *inode, struct iattr *attr) result = 0; } - - ncp_invalid_dir_cache(NCP_INOP(inode)->dir->inode); + ncp_invalid_dir_cache(NCP_INOP(inode)->dir->inode); return result; } - + #ifdef DEBUG_NCP_MALLOC int ncp_malloced; int ncp_current_malloced; #endif -static struct file_system_type ncp_fs_type = { - ncp_read_super, "ncpfs", 0, NULL - }; +static struct file_system_type ncp_fs_type = +{ + ncp_read_super, "ncpfs", 0, NULL +}; -int init_ncp_fs(void) +__initfunc(int init_ncp_fs(void)) { - return register_filesystem(&ncp_fs_type); + return register_filesystem(&ncp_fs_type); } #ifdef MODULE -int -init_module( void) -{ - int status; +EXPORT_NO_SYMBOLS; - DPRINTK("ncpfs: init_module called\n"); +int init_module(void) +{ + DPRINTK("ncpfs: init_module called\n"); #ifdef DEBUG_NCP_MALLOC - ncp_malloced = 0; - ncp_current_malloced = 0; + ncp_malloced = 0; + ncp_current_malloced = 0; #endif - ncp_init_dir_cache(); + ncp_init_dir_cache(); - if ((status = init_ncp_fs()) == 0) - register_symtab(0); - return status; + return init_ncp_fs(); } -void -cleanup_module(void) +void cleanup_module(void) { - DPRINTK("ncpfs: cleanup_module called\n"); - ncp_free_dir_cache(); - unregister_filesystem(&ncp_fs_type); + DPRINTK("ncpfs: cleanup_module called\n"); + ncp_free_dir_cache(); + unregister_filesystem(&ncp_fs_type); #ifdef DEBUG_NCP_MALLOC - printk("ncp_malloced: %d\n", ncp_malloced); - printk("ncp_current_malloced: %d\n", ncp_current_malloced); + printk("ncp_malloced: %d\n", ncp_malloced); + printk("ncp_current_malloced: %d\n", ncp_current_malloced); #endif } diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index e7d8aa7db..9b88c3c9f 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -14,70 +14,41 @@ #include <linux/mm.h> #include <linux/ncp.h> -int -ncp_ioctl (struct inode * inode, struct file * filp, - unsigned int cmd, unsigned long arg) +int ncp_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) { int result; struct ncp_ioctl_request request; struct ncp_fs_info info; struct ncp_server *server = NCP_SERVER(inode); - /* - * Binary compatible with 1.3.XX releases. - * Take this out in 2.1.0 development series. - * <mec@duracef.shout.net> 12 Mar 1996 - */ - switch(cmd) { - case _IOR('n', 1, unsigned char *): - cmd = NCP_IOC_NCPREQUEST; - break; - case _IOR('u', 1, uid_t): - cmd = NCP_IOC_GETMOUNTUID; - break; - case _IO('l', 1): - cmd = NCP_IOC_CONN_LOGGED_IN; - break; - case _IOWR('i', 1, unsigned char *): - cmd = NCP_IOC_GET_FS_INFO; - break; - } - - switch(cmd) { + switch (cmd) { case NCP_IOC_NCPREQUEST: - if ( (permission(inode, MAY_WRITE) != 0) - && (current->uid != server->m.mounted_uid)) - { + if ((permission(inode, MAY_WRITE) != 0) + && (current->uid != server->m.mounted_uid)) { return -EACCES; } - - if ((result = verify_area(VERIFY_READ, (char *)arg, - sizeof(request))) != 0) - { + if ((result = verify_area(VERIFY_READ, (char *) arg, + sizeof(request))) != 0) { return result; } + copy_from_user(&request, (struct ncp_ioctl_request *) arg, + sizeof(request)); - copy_from_user(&request, (struct ncp_ioctl_request *)arg, - sizeof(request)); - - if ( (request.function > 255) + if ((request.function > 255) || (request.size > - NCP_PACKET_SIZE - sizeof(struct ncp_request_header))) - { + NCP_PACKET_SIZE - sizeof(struct ncp_request_header))) { return -EINVAL; } - - if ((result = verify_area(VERIFY_WRITE, (char *)request.data, - NCP_PACKET_SIZE)) != 0) - { + if ((result = verify_area(VERIFY_WRITE, (char *) request.data, + NCP_PACKET_SIZE)) != 0) { return result; } - ncp_lock_server(server); /* FIXME: We hack around in the server's structures - here to be able to use ncp_request */ + here to be able to use ncp_request */ server->has_subfunction = 0; server->current_size = request.size; @@ -95,66 +66,55 @@ ncp_ioctl (struct inode * inode, struct file * filp, case NCP_IOC_CONN_LOGGED_IN: - if ( (permission(inode, MAY_WRITE) != 0) - && (current->uid != server->m.mounted_uid)) - { + if ((permission(inode, MAY_WRITE) != 0) + && (current->uid != server->m.mounted_uid)) { return -EACCES; } - return ncp_conn_logged_in(server); - + case NCP_IOC_GET_FS_INFO: - if ( (permission(inode, MAY_WRITE) != 0) - && (current->uid != server->m.mounted_uid)) - { + if ((permission(inode, MAY_WRITE) != 0) + && (current->uid != server->m.mounted_uid)) { return -EACCES; } - - if ((result = verify_area(VERIFY_WRITE, (char *)arg, - sizeof(info))) != 0) - { + if ((result = verify_area(VERIFY_WRITE, (char *) arg, + sizeof(info))) != 0) { return result; } + copy_from_user(&info, (struct ncp_fs_info *) arg, + sizeof(info)); - copy_from_user(&info, (struct ncp_fs_info *)arg, - sizeof(info)); - - if (info.version != NCP_GET_FS_INFO_VERSION) - { + if (info.version != NCP_GET_FS_INFO_VERSION) { DPRINTK("info.version invalid: %d\n", info.version); return -EINVAL; } - - info.addr = server->m.serv_addr; - info.mounted_uid = server->m.mounted_uid; - info.connection = server->connection; - info.buffer_size = server->buffer_size; + /* TODO: info.addr = server->m.serv_addr; */ + info.mounted_uid = server->m.mounted_uid; + info.connection = server->connection; + info.buffer_size = server->buffer_size; info.volume_number = NCP_ISTRUCT(inode)->volNumber; - info.directory_id = NCP_ISTRUCT(inode)->DosDirNum; + info.directory_id = NCP_ISTRUCT(inode)->DosDirNum; - copy_to_user((struct ncp_fs_info *)arg, &info, sizeof(info)); - return 0; + copy_to_user((struct ncp_fs_info *) arg, &info, sizeof(info)); + return 0; - case NCP_IOC_GETMOUNTUID: + case NCP_IOC_GETMOUNTUID: - if ( (permission(inode, MAY_READ) != 0) - && (current->uid != server->m.mounted_uid)) - { + if ((permission(inode, MAY_READ) != 0) + && (current->uid != server->m.mounted_uid)) { return -EACCES; } - - if ((result = verify_area(VERIFY_WRITE, (uid_t*) arg, - sizeof(uid_t))) != 0) - { - return result; - } - put_user(server->m.mounted_uid, (uid_t *) arg); - return 0; + if ((result = verify_area(VERIFY_WRITE, (uid_t *) arg, + sizeof(uid_t))) != 0) { + return result; + } + put_user(server->m.mounted_uid, (uid_t *) arg); + return 0; default: return -EINVAL; } - + return -EINVAL; } diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index c7b828dc6..52ff3c76a 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -23,17 +23,16 @@ static inline int min(int a, int b) { - return a<b ? a : b; + return a < b ? a : b; } /* * Fill in the supplied page for mmap */ -static unsigned long -ncp_file_mmap_nopage(struct vm_area_struct * area, - unsigned long address, int no_share) +static unsigned long ncp_file_mmap_nopage(struct vm_area_struct *area, + unsigned long address, int no_share) { - struct inode * inode = area->vm_inode; + struct inode *inode = area->vm_inode; unsigned long page; unsigned int clear; unsigned long tmp; @@ -48,37 +47,28 @@ ncp_file_mmap_nopage(struct vm_area_struct * area, pos = address - area->vm_start + area->vm_offset; clear = 0; - if (address + PAGE_SIZE > area->vm_end) - { + if (address + PAGE_SIZE > area->vm_end) { clear = address + PAGE_SIZE - area->vm_end; } - - /* what we can read in one go */ + /* what we can read in one go */ bufsize = NCP_SERVER(inode)->buffer_size; fs = get_fs(); set_fs(get_ds()); - if (ncp_make_open(inode, O_RDONLY) < 0) - { - clear = PAGE_SIZE; - } - else - { + if (ncp_make_open(inode, O_RDONLY) < 0) { + clear = PAGE_SIZE; + } else { int already_read = 0; int count = PAGE_SIZE - clear; int to_read; - while (already_read < count) - { + while (already_read < count) { int read_this_time; - if ((pos % bufsize) != 0) - { + if ((pos % bufsize) != 0) { to_read = bufsize - (pos % bufsize); - } - else - { + } else { to_read = bufsize; } @@ -87,33 +77,31 @@ ncp_file_mmap_nopage(struct vm_area_struct * area, if (ncp_read(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, pos, to_read, - (char *)(page + already_read), - &read_this_time) != 0) - { - read_this_time = 0; + (char *) (page + already_read), + &read_this_time) != 0) { + read_this_time = 0; } - pos += read_this_time; already_read += read_this_time; - if (read_this_time < to_read) - { + if (read_this_time < to_read) { break; } } - } + } set_fs(fs); tmp = page + PAGE_SIZE; while (clear--) { - *(char *)--tmp = 0; + *(char *) --tmp = 0; } return page; } -struct vm_operations_struct ncp_file_mmap = { +struct vm_operations_struct ncp_file_mmap = +{ NULL, /* open */ NULL, /* close */ NULL, /* unmap */ @@ -128,18 +116,15 @@ struct vm_operations_struct ncp_file_mmap = { /* This is used for a general mmap of a ncp file */ -int -ncp_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma) +int ncp_mmap(struct inode *inode, struct file *file, struct vm_area_struct *vma) { - DPRINTK("ncp_mmap: called\n"); + DPRINTK("ncp_mmap: called\n"); - if (!ncp_conn_valid(NCP_SERVER(inode))) - { + if (!ncp_conn_valid(NCP_SERVER(inode))) { return -EIO; } - - /* only PAGE_COW or read-only supported now */ - if (vma->vm_flags & VM_SHARED) + /* only PAGE_COW or read-only supported now */ + if (vma->vm_flags & VM_SHARED) return -EINVAL; if (!inode->i_sb || !S_ISREG(inode->i_mode)) return -EACCES; @@ -147,7 +132,6 @@ ncp_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma) inode->i_atime = CURRENT_TIME; inode->i_dirt = 1; } - vma->vm_inode = inode; inode->i_count++; vma->vm_ops = &ncp_file_mmap; diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index a0c6f5650..f7d4de4a1 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -2,58 +2,49 @@ * ncplib_kernel.c * * Copyright (C) 1995, 1996 by Volker Lendecke + * Modified for big endian by J.F. Chadima and David S. Miller * */ #include "ncplib_kernel.h" -typedef __u8 byte; -typedef __u16 word; -typedef __u32 dword; - static inline int min(int a, int b) { - return a<b ? a : b; + return a < b ? a : b; } -static void -assert_server_locked(struct ncp_server *server) +static void assert_server_locked(struct ncp_server *server) { - if (server->lock == 0) - { + if (server->lock == 0) { DPRINTK("ncpfs: server not locked!\n"); } } -static void -ncp_add_byte(struct ncp_server *server, byte x) +static void ncp_add_byte(struct ncp_server *server, __u8 x) { assert_server_locked(server); - *(byte *)(&(server->packet[server->current_size])) = x; + *(__u8 *) (&(server->packet[server->current_size])) = x; server->current_size += 1; return; } -static void -ncp_add_word(struct ncp_server *server, word x) +static void ncp_add_word(struct ncp_server *server, __u16 x) { assert_server_locked(server); - *(word *)(&(server->packet[server->current_size])) = x; + put_unaligned(x, (__u16 *) (&(server->packet[server->current_size]))); server->current_size += 2; return; } -static void -ncp_add_dword(struct ncp_server *server, dword x) +static void ncp_add_dword(struct ncp_server *server, __u32 x) { assert_server_locked(server); - *(dword *)(&(server->packet[server->current_size])) = x; + put_unaligned(x, (__u32 *) (&(server->packet[server->current_size]))); server->current_size += 4; return; } -static void -ncp_add_mem(struct ncp_server *server, const void *source, int size) +static void ncp_add_mem(struct ncp_server *server, const void *source, int size) { assert_server_locked(server); memcpy(&(server->packet[server->current_size]), source, size); @@ -61,8 +52,7 @@ ncp_add_mem(struct ncp_server *server, const void *source, int size) return; } -static void -ncp_add_mem_fromfs(struct ncp_server *server, const char *source, int size) +static void ncp_add_mem_fromfs(struct ncp_server *server, const char *source, int size) { assert_server_locked(server); copy_from_user(&(server->packet[server->current_size]), source, size); @@ -70,13 +60,11 @@ ncp_add_mem_fromfs(struct ncp_server *server, const char *source, int size) return; } -static void -ncp_add_pstring(struct ncp_server *server, const char *s) +static void ncp_add_pstring(struct ncp_server *server, const char *s) { int len = strlen(s); assert_server_locked(server); - if (len > 255) - { + if (len > 255) { DPRINTK("ncpfs: string too long: %s\n", s); len = 255; } @@ -85,8 +73,7 @@ ncp_add_pstring(struct ncp_server *server, const char *s) return; } -static void -ncp_init_request(struct ncp_server *server) +static void ncp_init_request(struct ncp_server *server) { ncp_lock_server(server); @@ -94,11 +81,10 @@ ncp_init_request(struct ncp_server *server) server->has_subfunction = 0; } -static void -ncp_init_request_s(struct ncp_server *server, int subfunction) +static void ncp_init_request_s(struct ncp_server *server, int subfunction) { ncp_init_request(server); - ncp_add_word(server, 0); /* preliminary size */ + ncp_add_word(server, 0); /* preliminary size */ ncp_add_byte(server, subfunction); @@ -106,53 +92,49 @@ ncp_init_request_s(struct ncp_server *server, int subfunction) } static char * -ncp_reply_data(struct ncp_server *server, int offset) + ncp_reply_data(struct ncp_server *server, int offset) { return &(server->packet[sizeof(struct ncp_reply_header) + offset]); } -static byte -ncp_reply_byte(struct ncp_server *server, int offset) +static __u8 + ncp_reply_byte(struct ncp_server *server, int offset) { - return *(byte *)(ncp_reply_data(server, offset)); + return get_unaligned((__u8 *) ncp_reply_data(server, offset)); } -static word -ncp_reply_word(struct ncp_server *server, int offset) +static __u16 + ncp_reply_word(struct ncp_server *server, int offset) { - return *(word *)(ncp_reply_data(server, offset)); + return get_unaligned((__u16 *) ncp_reply_data(server, offset)); } -static dword -ncp_reply_dword(struct ncp_server *server, int offset) +static __u32 + ncp_reply_dword(struct ncp_server *server, int offset) { - return *(dword *)(ncp_reply_data(server, offset)); + return get_unaligned((__u32 *) ncp_reply_data(server, offset)); } -int -ncp_negotiate_buffersize(struct ncp_server *server, - int size, int *target) +int ncp_negotiate_buffersize(struct ncp_server *server, + int size, int *target) { int result; ncp_init_request(server); ncp_add_word(server, htons(size)); - - if ((result = ncp_request(server, 33)) != 0) - { + + if ((result = ncp_request(server, 33)) != 0) { ncp_unlock_server(server); return result; } - - *target =min(ntohs(ncp_reply_word(server, 0)), size); + *target = min(ntohs(ncp_reply_word(server, 0)), size); ncp_unlock_server(server); return 0; } -int -ncp_get_volume_info_with_number(struct ncp_server *server, int n, - struct ncp_volume_info *target) +int ncp_get_volume_info_with_number(struct ncp_server *server, int n, + struct ncp_volume_info *target) { int result; int len; @@ -160,14 +142,12 @@ ncp_get_volume_info_with_number(struct ncp_server *server, int n, ncp_init_request_s(server, 44); ncp_add_byte(server, n); - if ((result = ncp_request(server, 22)) != 0) - { + if ((result = ncp_request(server, 22)) != 0) { ncp_unlock_server(server); return result; } - target->total_blocks = ncp_reply_dword(server, 0); - target->free_blocks = ncp_reply_dword(server, 4); + target->free_blocks = ncp_reply_dword(server, 4); target->purgeable_blocks = ncp_reply_dword(server, 8); target->not_yet_purgeable_blocks = ncp_reply_dword(server, 12); target->total_dir_entries = ncp_reply_dword(server, 16); @@ -177,20 +157,17 @@ ncp_get_volume_info_with_number(struct ncp_server *server, int n, memset(&(target->volume_name), 0, sizeof(target->volume_name)); len = ncp_reply_byte(server, 29); - if (len > NCP_VOLNAME_LEN) - { + if (len > NCP_VOLNAME_LEN) { DPRINTK("ncpfs: volume name too long: %d\n", len); ncp_unlock_server(server); return -EIO; } - memcpy(&(target->volume_name), ncp_reply_data(server, 30), len); ncp_unlock_server(server); return 0; } -int -ncp_close_file(struct ncp_server *server, const char *file_id) +int ncp_close_file(struct ncp_server *server, const char *file_id) { int result; @@ -203,35 +180,27 @@ ncp_close_file(struct ncp_server *server, const char *file_id) return result; } -static void -ncp_add_handle_path(struct ncp_server *server, - __u8 vol_num, - __u32 dir_base, int have_dir_base, - char *path) +static void ncp_add_handle_path(struct ncp_server *server, + __u8 vol_num, + __u32 dir_base, int have_dir_base, + char *path) { ncp_add_byte(server, vol_num); ncp_add_dword(server, dir_base); - if (have_dir_base != 0) - { - ncp_add_byte(server, 1); /* dir_base */ - } - else - { - ncp_add_byte(server, 0xff); /* no handle */ + if (have_dir_base != 0) { + ncp_add_byte(server, 1); /* dir_base */ + } else { + ncp_add_byte(server, 0xff); /* no handle */ } - if (path != NULL) - { - ncp_add_byte(server, 1); /* 1 component */ + if (path != NULL) { + ncp_add_byte(server, 1); /* 1 component */ ncp_add_pstring(server, path); - } - else - { + } else { ncp_add_byte(server, 0); } } -static void -ncp_extract_file_info(void *structure, struct nw_info_struct *target) +static void ncp_extract_file_info(void *structure, struct nw_info_struct *target) { __u8 *name_len; const int info_struct_size = sizeof(struct nw_info_struct) - 257; @@ -239,70 +208,60 @@ ncp_extract_file_info(void *structure, struct nw_info_struct *target) memcpy(target, structure, info_struct_size); name_len = structure + info_struct_size; target->nameLen = *name_len; - strncpy(target->entryName, name_len+1, *name_len); + strncpy(target->entryName, name_len + 1, *name_len); target->entryName[*name_len] = '\0'; return; } -int -ncp_obtain_info(struct ncp_server *server, - __u8 vol_num, __u32 dir_base, - char *path, /* At most 1 component */ - struct nw_info_struct *target) +int ncp_obtain_info(struct ncp_server *server, + __u8 vol_num, __u32 dir_base, + char *path, /* At most 1 component */ + struct nw_info_struct *target) { int result; - if (target == NULL) - { + if (target == NULL) { return -EINVAL; } - ncp_init_request(server); - ncp_add_byte(server, 6); /* subfunction */ + ncp_add_byte(server, 6); /* subfunction */ ncp_add_byte(server, server->name_space[vol_num]); ncp_add_byte(server, server->name_space[vol_num]); - ncp_add_word(server, 0xff); /* get all */ + ncp_add_word(server, htons(0xff00)); /* get all */ ncp_add_dword(server, RIM_ALL); ncp_add_handle_path(server, vol_num, dir_base, 1, path); - if ((result = ncp_request(server, 87)) != 0) - { + if ((result = ncp_request(server, 87)) != 0) { ncp_unlock_server(server); return result; } - ncp_extract_file_info(ncp_reply_data(server, 0), target); ncp_unlock_server(server); return 0; } -static inline int -ncp_has_os2_namespace(struct ncp_server *server, __u8 volume) +static inline int ncp_has_os2_namespace(struct ncp_server *server, __u8 volume) { int result; __u8 *namespace; __u16 no_namespaces; ncp_init_request(server); - ncp_add_byte(server, 24); /* Subfunction: Get Name Spaces Loaded */ + ncp_add_byte(server, 24); /* Subfunction: Get Name Spaces Loaded */ ncp_add_word(server, 0); ncp_add_byte(server, volume); - if ((result = ncp_request(server, 87)) != 0) - { + if ((result = ncp_request(server, 87)) != 0) { ncp_unlock_server(server); return 0; } - no_namespaces = ncp_reply_word(server, 0); namespace = ncp_reply_data(server, 2); - while (no_namespaces > 0) - { - DPRINTK("get_namespaces: found %d on %d\n", *namespace,volume); + while (no_namespaces > 0) { + DPRINTK("get_namespaces: found %d on %d\n", *namespace, volume); - if (*namespace == 4) - { + if (*namespace == 4) { DPRINTK("get_namespaces: found OS2\n"); ncp_unlock_server(server); return 1; @@ -314,10 +273,9 @@ ncp_has_os2_namespace(struct ncp_server *server, __u8 volume) return 0; } -int -ncp_lookup_volume(struct ncp_server *server, - char *volname, - struct nw_info_struct *target) +int ncp_lookup_volume(struct ncp_server *server, + char *volname, + struct nw_info_struct *target) { int result; int volnum; @@ -325,30 +283,28 @@ ncp_lookup_volume(struct ncp_server *server, DPRINTK("ncp_lookup_volume: looking up vol %s\n", volname); ncp_init_request(server); - ncp_add_byte(server, 22); /* Subfunction: Generate dir handle */ - ncp_add_byte(server, 0); /* DOS namespace */ - ncp_add_byte(server, 0); /* reserved */ - ncp_add_byte(server, 0); /* reserved */ - ncp_add_byte(server, 0); /* reserved */ - - ncp_add_byte(server, 0); /* faked volume number */ - ncp_add_dword(server, 0); /* faked dir_base */ - ncp_add_byte(server, 0xff); /* Don't have a dir_base */ - ncp_add_byte(server, 1); /* 1 path component */ + ncp_add_byte(server, 22); /* Subfunction: Generate dir handle */ + ncp_add_byte(server, 0); /* DOS namespace */ + ncp_add_byte(server, 0); /* reserved */ + ncp_add_byte(server, 0); /* reserved */ + ncp_add_byte(server, 0); /* reserved */ + + ncp_add_byte(server, 0); /* faked volume number */ + ncp_add_dword(server, 0); /* faked dir_base */ + ncp_add_byte(server, 0xff); /* Don't have a dir_base */ + ncp_add_byte(server, 1); /* 1 path component */ ncp_add_pstring(server, volname); - if ((result = ncp_request(server, 87)) != 0) - { + if ((result = ncp_request(server, 87)) != 0) { ncp_unlock_server(server); return result; } - memset(target, 0, sizeof(*target)); target->DosDirNum = target->dirEntNum = ncp_reply_dword(server, 4); target->volNumber = volnum = ncp_reply_byte(server, 8); ncp_unlock_server(server); - server->name_space[volnum] = ncp_has_os2_namespace(server,volnum)?4:0; + server->name_space[volnum] = ncp_has_os2_namespace(server, volnum) ? 4 : 0; DPRINTK("lookup_vol: namespace[%d] = %d\n", volnum, server->name_space[volnum]); @@ -359,19 +315,18 @@ ncp_lookup_volume(struct ncp_server *server, return 0; } -int -ncp_modify_file_or_subdir_dos_info(struct ncp_server *server, - struct nw_info_struct *file, - __u32 info_mask, - struct nw_modify_dos_info *info) +int ncp_modify_file_or_subdir_dos_info(struct ncp_server *server, + struct nw_info_struct *file, + __u32 info_mask, + struct nw_modify_dos_info *info) { int result; ncp_init_request(server); - ncp_add_byte(server, 7); /* subfunction */ + ncp_add_byte(server, 7); /* subfunction */ ncp_add_byte(server, server->name_space[file->volNumber]); - ncp_add_byte(server, 0); /* reserved */ - ncp_add_word(server, 0x8006); /* search attribs: all */ + ncp_add_byte(server, 0); /* reserved */ + ncp_add_word(server, htons(0x0680)); /* search attribs: all */ ncp_add_dword(server, info_mask); ncp_add_mem(server, info, sizeof(*info)); @@ -383,55 +338,50 @@ ncp_modify_file_or_subdir_dos_info(struct ncp_server *server, return result; } -int -ncp_del_file_or_subdir(struct ncp_server *server, - struct nw_info_struct *dir, char *name) +int ncp_del_file_or_subdir(struct ncp_server *server, + struct nw_info_struct *dir, char *name) { int result; ncp_init_request(server); - ncp_add_byte(server, 8); /* subfunction */ + ncp_add_byte(server, 8); /* subfunction */ ncp_add_byte(server, server->name_space[dir->volNumber]); - ncp_add_byte(server, 0); /* reserved */ - ncp_add_word(server, 0x8006); /* search attribs: all */ + ncp_add_byte(server, 0); /* reserved */ + ncp_add_word(server, ntohs(0x0680)); /* search attribs: all */ ncp_add_handle_path(server, dir->volNumber, dir->dirEntNum, 1, name); - + result = ncp_request(server, 87); ncp_unlock_server(server); return result; } -static inline void -ConvertToNWfromDWORD ( __u32 sfd , __u8 ret[6] ) +static inline void ConvertToNWfromDWORD(__u32 sfd, __u8 ret[6]) { - __u16 *dest = (__u16 *) ret; - memcpy(&(dest[1]), &sfd, 4); - dest[0] = dest[1] + 1; - return; + __u16 *dest = (__u16 *) ret; + memcpy(ret + 2, &sfd, 4); + dest[0] = cpu_to_le16((le16_to_cpu(dest[1]) + le16_to_cpu(1))); + return; } /* If both dir and name are NULL, then in target there's already a looked-up entry that wants to be opened. */ -int -ncp_open_create_file_or_subdir(struct ncp_server *server, - struct nw_info_struct *dir, char *name, - int open_create_mode, - __u32 create_attributes, - int desired_acc_rights, - struct nw_file_info *target) +int ncp_open_create_file_or_subdir(struct ncp_server *server, + struct nw_info_struct *dir, char *name, + int open_create_mode, + __u32 create_attributes, + int desired_acc_rights, + struct nw_file_info *target) { int result; - __u16 search_attribs = 0x0006; + __u16 search_attribs = ntohs(0x0600); __u8 volume = (dir != NULL) ? dir->volNumber : target->i.volNumber; - if ((create_attributes & aDIR) != 0) - { - search_attribs |= 0x8000; -} - + if ((create_attributes & aDIR) != 0) { + search_attribs |= ntohs(0x0080); + } ncp_init_request(server); - ncp_add_byte(server, 1); /* subfunction */ + ncp_add_byte(server, 1); /* subfunction */ ncp_add_byte(server, server->name_space[volume]); ncp_add_byte(server, open_create_mode); ncp_add_word(server, search_attribs); @@ -441,89 +391,76 @@ ncp_open_create_file_or_subdir(struct ncp_server *server, for directories */ ncp_add_word(server, desired_acc_rights); - if (dir != NULL) - { + if (dir != NULL) { ncp_add_handle_path(server, volume, dir->dirEntNum, 1, name); - } - else - { + } else { ncp_add_handle_path(server, volume, target->i.dirEntNum, 1, NULL); - } + } - if ((result = ncp_request(server, 87)) != 0) - { + if ((result = ncp_request(server, 87)) != 0) { ncp_unlock_server(server); return result; } - target->opened = 1; target->server_file_handle = ncp_reply_dword(server, 0); target->open_create_action = ncp_reply_byte(server, 4); - if (dir != NULL) - { + if (dir != NULL) { /* in target there's a new finfo to fill */ ncp_extract_file_info(ncp_reply_data(server, 5), &(target->i)); } - ConvertToNWfromDWORD(target->server_file_handle, target->file_handle); ncp_unlock_server(server); return 0; } - -int -ncp_initialize_search(struct ncp_server *server, - struct nw_info_struct *dir, - struct nw_search_sequence *target) + +int ncp_initialize_search(struct ncp_server *server, + struct nw_info_struct *dir, + struct nw_search_sequence *target) { int result; ncp_init_request(server); - ncp_add_byte(server, 2); /* subfunction */ + ncp_add_byte(server, 2); /* subfunction */ ncp_add_byte(server, server->name_space[dir->volNumber]); - ncp_add_byte(server, 0); /* reserved */ + ncp_add_byte(server, 0); /* reserved */ ncp_add_handle_path(server, dir->volNumber, dir->dirEntNum, 1, NULL); - - if ((result = ncp_request(server, 87)) != 0) - { + + if ((result = ncp_request(server, 87)) != 0) { ncp_unlock_server(server); return result; } - memcpy(target, ncp_reply_data(server, 0), sizeof(*target)); ncp_unlock_server(server); return 0; } - + /* Search for everything */ -int -ncp_search_for_file_or_subdir(struct ncp_server *server, - struct nw_search_sequence *seq, - struct nw_info_struct *target) +int ncp_search_for_file_or_subdir(struct ncp_server *server, + struct nw_search_sequence *seq, + struct nw_info_struct *target) { int result; ncp_init_request(server); - ncp_add_byte(server, 3); /* subfunction */ + ncp_add_byte(server, 3); /* subfunction */ ncp_add_byte(server, server->name_space[seq->volNumber]); - ncp_add_byte(server, 0); /* data stream (???) */ - ncp_add_word(server, 0xffff); /* Search attribs */ - ncp_add_dword(server, RIM_ALL); /* return info mask */ + ncp_add_byte(server, 0); /* data stream (???) */ + ncp_add_word(server, 0xffff); /* Search attribs */ + ncp_add_dword(server, RIM_ALL); /* return info mask */ ncp_add_mem(server, seq, 9); - ncp_add_byte(server, 2); /* 2 byte pattern */ - ncp_add_byte(server, 0xff); /* following is a wildcard */ + ncp_add_byte(server, 2); /* 2 byte pattern */ + ncp_add_byte(server, 0xff); /* following is a wildcard */ ncp_add_byte(server, '*'); - - if ((result = ncp_request(server, 87)) != 0) - { + + if ((result = ncp_request(server, 87)) != 0) { ncp_unlock_server(server); return result; } - memcpy(seq, ncp_reply_data(server, 0), sizeof(*seq)); ncp_extract_file_info(ncp_reply_data(server, 10), target); @@ -531,34 +468,33 @@ ncp_search_for_file_or_subdir(struct ncp_server *server, return 0; } -int -ncp_ren_or_mov_file_or_subdir(struct ncp_server *server, - struct nw_info_struct *old_dir, char *old_name, - struct nw_info_struct *new_dir, char *new_name) +int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server, + struct nw_info_struct *old_dir, char *old_name, + struct nw_info_struct *new_dir, char *new_name) { int result; - if ( (old_dir == NULL) || (old_name == NULL) + if ((old_dir == NULL) || (old_name == NULL) || (new_dir == NULL) || (new_name == NULL)) return -EINVAL; - + ncp_init_request(server); - ncp_add_byte(server, 4); /* subfunction */ + ncp_add_byte(server, 4); /* subfunction */ ncp_add_byte(server, server->name_space[old_dir->volNumber]); - ncp_add_byte(server, 1); /* rename flag */ - ncp_add_word(server, 0x8006); /* search attributes */ + ncp_add_byte(server, 1); /* rename flag */ + ncp_add_word(server, ntohs(0x0680)); /* search attributes */ /* source Handle Path */ ncp_add_byte(server, old_dir->volNumber); ncp_add_dword(server, old_dir->dirEntNum); ncp_add_byte(server, 1); - ncp_add_byte(server, 1); /* 1 source component */ + ncp_add_byte(server, 1); /* 1 source component */ /* dest Handle Path */ ncp_add_byte(server, new_dir->volNumber); ncp_add_dword(server, new_dir->dirEntNum); ncp_add_byte(server, 1); - ncp_add_byte(server, 1); /* 1 destination component */ + ncp_add_byte(server, 1); /* 1 destination component */ /* source path string */ ncp_add_pstring(server, old_name); @@ -569,13 +505,12 @@ ncp_ren_or_mov_file_or_subdir(struct ncp_server *server, ncp_unlock_server(server); return result; } - + /* We have to transfer to/from user space */ -int -ncp_read(struct ncp_server *server, const char *file_id, - __u32 offset, __u16 to_read, - char *target, int *bytes_read) +int ncp_read(struct ncp_server *server, const char *file_id, + __u32 offset, __u16 to_read, + char *target, int *bytes_read) { int result; @@ -585,24 +520,21 @@ ncp_read(struct ncp_server *server, const char *file_id, ncp_add_dword(server, htonl(offset)); ncp_add_word(server, htons(to_read)); - if ((result = ncp_request(server, 72)) != 0) - { + if ((result = ncp_request(server, 72)) != 0) { ncp_unlock_server(server); return result; } - *bytes_read = ntohs(ncp_reply_word(server, 0)); - copy_to_user(target, ncp_reply_data(server, 2+(offset&1)), *bytes_read); + copy_to_user(target, ncp_reply_data(server, 2 + (offset & 1)), *bytes_read); ncp_unlock_server(server); return 0; } -int -ncp_write(struct ncp_server *server, const char *file_id, - __u32 offset, __u16 to_write, - const char *source, int *bytes_written) +int ncp_write(struct ncp_server *server, const char *file_id, + __u32 offset, __u16 to_write, + const char *source, int *bytes_written) { int result; @@ -613,15 +545,12 @@ ncp_write(struct ncp_server *server, const char *file_id, ncp_add_word(server, htons(to_write)); ncp_add_mem_fromfs(server, source, to_write); - if ((result = ncp_request(server, 73)) != 0) - { + if ((result = ncp_request(server, 73)) != 0) { ncp_unlock_server(server); return result; } - *bytes_written = to_write; ncp_unlock_server(server); return 0; } - diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index f76c2c666..48425adb5 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -2,6 +2,7 @@ * ncplib_kernel.h * * Copyright (C) 1995, 1996 by Volker Lendecke + * Modified for big endian by J.F. Chadima and David S. Miller * */ @@ -18,6 +19,8 @@ #include <linux/stat.h> #include <linux/fcntl.h> #include <asm/uaccess.h> +#include <asm/byteorder.h> +#include <asm/unaligned.h> #include <asm/string.h> #include <linux/ncp.h> @@ -26,91 +29,13 @@ int ncp_negotiate_buffersize(struct ncp_server *server, int size, int *target); int -ncp_get_encryption_key(struct ncp_server *server, - char *target); -int -ncp_get_bindery_object_id(struct ncp_server *server, - int object_type, char *object_name, - struct ncp_bindery_object *target); -int -ncp_login_encrypted(struct ncp_server *server, - struct ncp_bindery_object *object, - unsigned char *key, - unsigned char *passwd); -int -ncp_login_user(struct ncp_server *server, - unsigned char *username, - unsigned char *password); -int ncp_get_volume_info_with_number(struct ncp_server *server, int n, struct ncp_volume_info *target); int -ncp_get_volume_number(struct ncp_server *server, const char *name, - int *target); - -int -ncp_file_search_init(struct ncp_server *server, - int dir_handle, const char *path, - struct ncp_filesearch_info *target); - -int -ncp_file_search_continue(struct ncp_server *server, - struct ncp_filesearch_info *fsinfo, - int attributes, const char *path, - struct ncp_file_info *target); - -int -ncp_get_finfo(struct ncp_server *server, - int dir_handle, const char *path, const char *name, - struct ncp_file_info *target); - -int -ncp_open_file(struct ncp_server *server, - int dir_handle, const char *path, - int attr, int access, - struct ncp_file_info *target); -int ncp_close_file(struct ncp_server *server, const char *file_id); int -ncp_create_newfile(struct ncp_server *server, - int dir_handle, const char *path, - int attr, - struct ncp_file_info *target); - -int -ncp_create_file(struct ncp_server *server, - int dir_handle, const char *path, - int attr, - struct ncp_file_info *target); - -int -ncp_erase_file(struct ncp_server *server, - int dir_handle, const char *path, - int attr); - -int -ncp_rename_file(struct ncp_server *server, - int old_handle, const char *old_path, - int attr, - int new_handle, const char *new_path); - -int -ncp_create_directory(struct ncp_server *server, - int dir_handle, const char *path, - int inherit_mask); - -int -ncp_delete_directory(struct ncp_server *server, - int dir_handle, const char *path); - -int -ncp_rename_directory(struct ncp_server *server, - int dir_handle, - const char *old_path, const char *new_path); - -int ncp_read(struct ncp_server *server, const char *file_id, __u32 offset, __u16 to_read, char *target, int *bytes_read); diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c index b8017a9d2..19b42890d 100644 --- a/fs/ncpfs/sock.c +++ b/fs/ncpfs/sock.c @@ -18,304 +18,66 @@ #include <linux/net.h> #include <linux/mm.h> #include <linux/netdevice.h> +#include <net/scm.h> #include <linux/ipx.h> #include <linux/ncp.h> #include <linux/ncp_fs.h> #include <linux/ncp_fs_sb.h> #include <net/sock.h> +#include <linux/poll.h> - -#define _S(nr) (1<<((nr)-1)) -static int _recvfrom(struct socket *sock, unsigned char *ubuf, - int size, int noblock, unsigned flags, - struct sockaddr_ipx *sa, int *addr_len) +static int _recv(struct socket *sock, unsigned char *ubuf, int size, + unsigned flags) { - struct iovec iov; - struct msghdr msg; - - iov.iov_base = ubuf; - iov.iov_len = size; - - msg.msg_name = (void *)sa; - msg.msg_namelen = 0; - if (addr_len) - msg.msg_namelen = *addr_len; - msg.msg_control = NULL; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - - return sock->ops->recvmsg(sock, &msg, size, noblock, flags, addr_len); -} + struct iovec iov; + struct msghdr msg; + struct scm_cookie scm; -static int _sendto(struct socket *sock, const void *buff, - int len, int nonblock, unsigned flags, - struct sockaddr_ipx *sa, int addr_len) + memset(&scm, 0, sizeof(scm)); -{ - struct iovec iov; - struct msghdr msg; - - iov.iov_base = (void *)buff; - iov.iov_len = len; - - msg.msg_name = (void *)sa; - msg.msg_namelen = addr_len; - msg.msg_control = NULL; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - - return sock->ops->sendmsg(sock, &msg, len, nonblock, flags); -} + iov.iov_base = ubuf; + iov.iov_len = size; - -static void -ncp_wdog_data_ready(struct sock *sk, int len) -{ - struct socket *sock = sk->socket; - - if (!sk->dead) - { - unsigned char packet_buf[2]; - struct sockaddr_ipx sender; - int addr_len = sizeof(struct sockaddr_ipx); - int result; - unsigned short fs; - - fs = get_fs(); - set_fs(get_ds()); - - result = _recvfrom(sock, (void *)packet_buf, 2, 1, 0, - &sender, &addr_len); - - if ( (result != 2) - || (packet_buf[1] != '?') - /* How to check connection number here? */ - ) - { - printk("ncpfs: got strange packet on watchdog " - "socket\n"); - } - else - { - int result; - DDPRINTK("ncpfs: got watchdog from:\n"); - DDPRINTK("ncpfs: %08lX:%02X%02X%02X%02X%02X%02X:%04X," - " conn:%02X,type:%c\n", - htonl(sender.sipx_network), - sender.sipx_node[0], sender.sipx_node[1], - sender.sipx_node[2], sender.sipx_node[3], - sender.sipx_node[4], sender.sipx_node[5], - ntohs(sender.sipx_port), - packet_buf[0], packet_buf[1]); - - packet_buf[1] = 'Y'; - result = _sendto(sock, (void *)packet_buf, 2, 1, 0, - &sender, sizeof(sender)); - DDPRINTK("send result: %d\n", result); - } - set_fs(fs); - } -} - -int -ncp_catch_watchdog(struct ncp_server *server) -{ - struct file *file; - struct inode *inode; - struct socket *sock; - struct sock *sk; - - if ( (server == NULL) - || ((file = server->wdog_filp) == NULL) - || ((inode = file->f_inode) == NULL) - || (!S_ISSOCK(inode->i_mode))) - { - printk("ncp_catch_watchdog: did not get valid server!\n"); - server->data_ready = NULL; - return -EINVAL; - } - - sock = &(inode->u.socket_i); - - if (sock->type != SOCK_DGRAM) - { - printk("ncp_catch_watchdog: did not get SOCK_DGRAM\n"); - server->data_ready = NULL; - return -EINVAL; - } - - sk = (struct sock *)(sock->data); - - if (sk == NULL) - { - printk("ncp_catch_watchdog: sk == NULL"); - server->data_ready = NULL; - return -EINVAL; - } - - DDPRINTK("ncp_catch_watchdog: sk->d_r = %x, server->d_r = %x\n", - (unsigned int)(sk->data_ready), - (unsigned int)(server->data_ready)); - - if (sk->data_ready == ncp_wdog_data_ready) - { - printk("ncp_catch_watchdog: already done\n"); - return -EINVAL; - } - - server->data_ready = sk->data_ready; - sk->data_ready = ncp_wdog_data_ready; - sk->allocation = GFP_ATOMIC; - return 0; -} - -int -ncp_dont_catch_watchdog(struct ncp_server *server) -{ - struct file *file; - struct inode *inode; - struct socket *sock; - struct sock *sk; - - if ( (server == NULL) - || ((file = server->wdog_filp) == NULL) - || ((inode = file->f_inode) == NULL) - || (!S_ISSOCK(inode->i_mode))) - { - printk("ncp_dont_catch_watchdog: " - "did not get valid server!\n"); - return -EINVAL; - } - - sock = &(inode->u.socket_i); - - if (sock->type != SOCK_DGRAM) - { - printk("ncp_dont_catch_watchdog: did not get SOCK_DGRAM\n"); - return -EINVAL; - } - - sk = (struct sock *)(sock->data); - - if (sk == NULL) - { - printk("ncp_dont_catch_watchdog: sk == NULL"); - return -EINVAL; - } - - if (server->data_ready == NULL) - { - printk("ncp_dont_catch_watchdog: " - "server->data_ready == NULL\n"); - return -EINVAL; - } - - if (sk->data_ready != ncp_wdog_data_ready) - { - printk("ncp_dont_catch_watchdog: " - "sk->data_callback != ncp_data_callback\n"); - return -EINVAL; - } - - DDPRINTK("ncp_dont_catch_watchdog: sk->d_r = %x, server->d_r = %x\n", - (unsigned int)(sk->data_ready), - (unsigned int)(server->data_ready)); - - sk->data_ready = server->data_ready; - sk->allocation = GFP_KERNEL; - server->data_ready = NULL; - return 0; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + return sock->ops->recvmsg(sock, &msg, size, flags, &scm); } -static void -ncp_msg_data_ready(struct sock *sk, int len) +static int _send(struct socket *sock, const void *buff, int len) { - struct socket *sock = sk->socket; - - if (!sk->dead) - { - unsigned char packet_buf[2]; - struct sockaddr_ipx sender; - int addr_len = sizeof(struct sockaddr_ipx); - int result; - unsigned short fs; - - fs = get_fs(); - set_fs(get_ds()); - - result = _recvfrom(sock, (void *)packet_buf, 2, 1, 0, - &sender, &addr_len); - - DPRINTK("ncpfs: got message of size %d from:\n", result); - DPRINTK("ncpfs: %08lX:%02X%02X%02X%02X%02X%02X:%04X," - " conn:%02X,type:%c\n", - htonl(sender.sipx_network), - sender.sipx_node[0], sender.sipx_node[1], - sender.sipx_node[2], sender.sipx_node[3], - sender.sipx_node[4], sender.sipx_node[5], - ntohs(sender.sipx_port), - packet_buf[0], packet_buf[1]); - - ncp_trigger_message(sk->protinfo.af_ipx.ncp_server); - - set_fs(fs); + struct iovec iov; + struct msghdr msg; + struct scm_cookie scm; + int err; + + iov.iov_base = (void *) buff; + iov.iov_len = len; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_flags = 0; + + err = scm_send(sock, &msg, &scm); + if (err < 0) { + return err; } + err = sock->ops->sendmsg(sock, &msg, len, &scm); + scm_destroy(&scm); + return err; } -int -ncp_catch_message(struct ncp_server *server) -{ - struct file *file; - struct inode *inode; - struct socket *sock; - struct sock *sk; - - if ( (server == NULL) - || ((file = server->msg_filp) == NULL) - || ((inode = file->f_inode) == NULL) - || (!S_ISSOCK(inode->i_mode))) - { - printk("ncp_catch_message: did not get valid server!\n"); - return -EINVAL; - } - - sock = &(inode->u.socket_i); - - if (sock->type != SOCK_DGRAM) - { - printk("ncp_catch_message: did not get SOCK_DGRAM\n"); - return -EINVAL; - } - - sk = (struct sock *)(sock->data); - - if (sk == NULL) - { - printk("ncp_catch_message: sk == NULL"); - return -EINVAL; - } - - DDPRINTK("ncp_catch_message: sk->d_r = %x\n", - (unsigned int)(sk->data_ready)); - - if (sk->data_ready == ncp_msg_data_ready) - { - printk("ncp_catch_message: already done\n"); - return -EINVAL; - } - - sk->data_ready = ncp_msg_data_ready; - sk->protinfo.af_ipx.ncp_server = server; - return 0; -} - #define NCP_SLACK_SPACE 1024 #define _S(nr) (1<<((nr)-1)) -static int -do_ncp_rpc_call(struct ncp_server *server, int size) +static int do_ncp_rpc_call(struct ncp_server *server, int size) { struct file *file; struct inode *inode; @@ -323,32 +85,26 @@ do_ncp_rpc_call(struct ncp_server *server, int size) unsigned short fs; int result; char *start = server->packet; - select_table wait_table; - struct select_table_entry entry; - int (*select) (struct inode *, struct file *, int, select_table *); + poll_table wait_table; + struct poll_table_entry entry; int init_timeout, max_timeout; int timeout; int retrans; int major_timeout_seen; int acknowledge_seen; - char *server_name; int n; - int addrlen; unsigned long old_mask; /* We have to check the result, so store the complete header */ struct ncp_request_header request = - *((struct ncp_request_header *)(server->packet)); - - struct ncp_reply_header reply; + *((struct ncp_request_header *) (server->packet)); + struct ncp_reply_header reply; file = server->ncp_filp; inode = file->f_inode; - select = file->f_op->select; sock = &inode->u.socket_i; - if (!sock) - { + if (!sock) { printk("ncp_rpc_call: socki_lookup failed\n"); return -EBADF; } @@ -357,63 +113,53 @@ do_ncp_rpc_call(struct ncp_server *server, int size) retrans = server->m.retry_count; major_timeout_seen = 0; acknowledge_seen = 0; - server_name = server->m.server_name; old_mask = current->blocked; current->blocked |= ~(_S(SIGKILL) #if 0 - | _S(SIGSTOP) + | _S(SIGSTOP) #endif - | ((server->m.flags & NCP_MOUNT_INTR) - ? ((current->sig->action[SIGINT - 1].sa_handler == SIG_DFL - ? _S(SIGINT) : 0) - | (current->sig->action[SIGQUIT - 1].sa_handler == SIG_DFL - ? _S(SIGQUIT) : 0)) - : 0)); + | ((server->m.flags & NCP_MOUNT_INTR) + ? ((current->sig->action[SIGINT - 1].sa_handler == SIG_DFL + ? _S(SIGINT) : 0) + | (current->sig->action[SIGQUIT - 1].sa_handler == SIG_DFL + ? _S(SIGQUIT) : 0)) + : 0)); fs = get_fs(); set_fs(get_ds()); - for (n = 0, timeout = init_timeout; ; n++, timeout <<= 1) - { + for (n = 0, timeout = init_timeout;; n++, timeout <<= 1) { DDPRINTK("ncpfs: %08lX:%02X%02X%02X%02X%02X%02X:%04X\n", - htonl(server->m.serv_addr.sipx_network), - server->m.serv_addr.sipx_node[0], - server->m.serv_addr.sipx_node[1], - server->m.serv_addr.sipx_node[2], - server->m.serv_addr.sipx_node[3], - server->m.serv_addr.sipx_node[4], - server->m.serv_addr.sipx_node[5], - ntohs(server->m.serv_addr.sipx_port)); + htonl(server->m.serv_addr.sipx_network), + server->m.serv_addr.sipx_node[0], + server->m.serv_addr.sipx_node[1], + server->m.serv_addr.sipx_node[2], + server->m.serv_addr.sipx_node[3], + server->m.serv_addr.sipx_node[4], + server->m.serv_addr.sipx_node[5], + ntohs(server->m.serv_addr.sipx_port)); DDPRINTK("ncpfs: req.typ: %04X, con: %d, " - "seq: %d", - request.type, - (request.conn_high << 8) + request.conn_low, - request.sequence); + "seq: %d", + request.type, + (request.conn_high << 8) + request.conn_low, + request.sequence); DDPRINTK(" func: %d\n", request.function); - result = _sendto(sock, (void *) start, size, 0, 0, - &(server->m.serv_addr), - sizeof(server->m.serv_addr)); - if (result < 0) - { + result = _send(sock, (void *) start, size); + if (result < 0) { printk("ncp_rpc_call: send error = %d\n", result); break; } - re_select: + re_select: wait_table.nr = 0; wait_table.entry = &entry; current->state = TASK_INTERRUPTIBLE; - if ( !select(inode, file, SEL_IN, &wait_table) - && !select(inode, file, SEL_IN, NULL)) - { - if (timeout > max_timeout) - { + if (!(file->f_op->poll(file, &wait_table) & POLLIN)) { + if (timeout > max_timeout) { /* JEJB/JSP 2/7/94 * This is useful to see if the system is * hanging */ - if (acknowledge_seen == 0) - { - printk("NCP max timeout reached on " - "%s\n", server_name); + if (acknowledge_seen == 0) { + printk("NCP max timeout\n"); } timeout = max_timeout; } @@ -421,105 +167,87 @@ do_ncp_rpc_call(struct ncp_server *server, int size) schedule(); remove_wait_queue(entry.wait_address, &entry.wait); current->state = TASK_RUNNING; - if (current->signal & ~current->blocked) - { + if (current->signal & ~current->blocked) { current->timeout = 0; result = -ERESTARTSYS; break; } - if (!current->timeout) - { + if (!current->timeout) { if (n < retrans) continue; - if (server->m.flags & NCP_MOUNT_SOFT) - { - printk("NCP server %s not responding, " - "timed out\n", server_name); + if (server->m.flags & NCP_MOUNT_SOFT) { + printk("NCP server not responding\n"); result = -EIO; break; } n = 0; timeout = init_timeout; init_timeout <<= 1; - if (!major_timeout_seen) - { - printk("NCP server %s not responding, " - "still trying\n", server_name); + if (!major_timeout_seen) { + printk("NCP server not responding\n"); } major_timeout_seen = 1; continue; - } - else + } else current->timeout = 0; - } - else if (wait_table.nr) + } else if (wait_table.nr) remove_wait_queue(entry.wait_address, &entry.wait); current->state = TASK_RUNNING; - addrlen = 0; /* Get the header from the next packet using a peek, so keep it * on the recv queue. If it is wrong, it will be some reply * we don't now need, so discard it */ - result = _recvfrom(sock, (void *)&reply, - sizeof(reply), 1, MSG_PEEK, - NULL, &addrlen); - if (result < 0) - { - if (result == -EAGAIN) - { + result = _recv(sock, (void *) &reply, sizeof(reply), + MSG_PEEK | MSG_DONTWAIT); + if (result < 0) { + if (result == -EAGAIN) { DPRINTK("ncp_rpc_call: bad select ready\n"); goto re_select; } - if (result == -ECONNREFUSED) - { + if (result == -ECONNREFUSED) { DPRINTK("ncp_rpc_call: server playing coy\n"); goto re_select; } - if (result != -ERESTARTSYS) - { + if (result != -ERESTARTSYS) { printk("ncp_rpc_call: recv error = %d\n", - -result); + -result); } break; } - if ( (result == sizeof(reply)) - && (reply.type == NCP_POSITIVE_ACK)) - { + if ((result == sizeof(reply)) + && (reply.type == NCP_POSITIVE_ACK)) { /* Throw away the packet */ DPRINTK("ncp_rpc_call: got positive acknowledge\n"); - _recvfrom(sock, (void *)&reply, sizeof(reply), 1, 0, - NULL, &addrlen); + _recv(sock, (void *) &reply, sizeof(reply), + MSG_DONTWAIT); n = 0; timeout = max_timeout; acknowledge_seen = 1; goto re_select; } - DDPRINTK("ncpfs: rep.typ: %04X, con: %d, tsk: %d," - "seq: %d\n", - reply.type, - (reply.conn_high << 8) + reply.conn_low, - reply.task, - reply.sequence); - - if ( (result >= sizeof(reply)) - && (reply.type == NCP_REPLY) - && ( (request.type == NCP_ALLOC_SLOT_REQUEST) - || ( (reply.sequence == request.sequence) - && (reply.conn_low == request.conn_low) -/* seem to get wrong task from NW311 && (reply.task == request.task)*/ - && (reply.conn_high == request.conn_high)))) - { + "seq: %d\n", + reply.type, + (reply.conn_high << 8) + reply.conn_low, + reply.task, + reply.sequence); + + if ((result >= sizeof(reply)) + && (reply.type == NCP_REPLY) + && ((request.type == NCP_ALLOC_SLOT_REQUEST) + || ((reply.sequence == request.sequence) + && (reply.conn_low == request.conn_low) +/* seem to get wrong task from NW311 && (reply.task == request.task) */ + && (reply.conn_high == request.conn_high)))) { if (major_timeout_seen) - printk("NCP server %s OK\n", server_name); + printk("NCP server OK\n"); break; } /* JEJB/JSP 2/7/94 * we have xid mismatch, so discard the packet and start * again. What a hack! but I can't call recvfrom with * a null buffer yet. */ - _recvfrom(sock, (void *)&reply, sizeof(reply), 1, 0, NULL, - &addrlen); + _recv(sock, (void *) &reply, sizeof(reply), MSG_DONTWAIT); DPRINTK("ncp_rpc_call: reply mismatch\n"); goto re_select; @@ -528,54 +256,42 @@ do_ncp_rpc_call(struct ncp_server *server, int size) * we have the correct reply, so read into the correct place and * return it */ - result = _recvfrom(sock, (void *)start, server->packet_size, - 1, 0, NULL, &addrlen); - if (result < 0) - { + result = _recv(sock, (void *) start, server->packet_size, MSG_DONTWAIT); + if (result < 0) { printk("NCP: notice message: result=%d\n", result); - } - else if (result < sizeof(struct ncp_reply_header)) - { + } else if (result < sizeof(struct ncp_reply_header)) { printk("NCP: just caught a too small read memory size..., " "email to NET channel\n"); - printk("NCP: result=%d,addrlen=%d\n", result, addrlen); + printk("NCP: result=%d\n", result); result = -EIO; } - current->blocked = old_mask; set_fs(fs); return result; } - /* * We need the server to be locked here, so check! */ -static int -ncp_do_request(struct ncp_server *server, int size) +static int ncp_do_request(struct ncp_server *server, int size) { int result; - if (server->lock == 0) - { + if (server->lock == 0) { printk("ncpfs: Server not locked!\n"); return -EIO; } - - if (!ncp_conn_valid(server)) - { + if (!ncp_conn_valid(server)) { return -EIO; } - result = do_ncp_rpc_call(server, size); DDPRINTK("do_ncp_rpc_call returned %d\n", result); - if (result < 0) - { + if (result < 0) { /* There was a problem with I/O, so the connections is - * no longer usable. */ + * no longer usable. */ ncp_invalidate_conn(server); } return result; @@ -584,121 +300,105 @@ ncp_do_request(struct ncp_server *server, int size) /* ncp_do_request assures that at least a complete reply header is * received. It assumes that server->current_size contains the ncp * request size */ -int -ncp_request(struct ncp_server *server, int function) +int ncp_request(struct ncp_server *server, int function) { struct ncp_request_header *h - = (struct ncp_request_header *)(server->packet); + = (struct ncp_request_header *) (server->packet); struct ncp_reply_header *reply - = (struct ncp_reply_header *)(server->packet); + = (struct ncp_reply_header *) (server->packet); int request_size = server->current_size - - sizeof(struct ncp_request_header); + - sizeof(struct ncp_request_header); int result; - if (server->has_subfunction != 0) - { - *(__u16 *)&(h->data[0]) = htons(request_size - 2); + if (server->has_subfunction != 0) { + *(__u16 *) & (h->data[0]) = htons(request_size - 2); } - h->type = NCP_REQUEST; - + server->sequence += 1; - h->sequence = server->sequence; - h->conn_low = (server->connection) & 0xff; + h->sequence = server->sequence; + h->conn_low = (server->connection) & 0xff; h->conn_high = ((server->connection) & 0xff00) >> 8; - h->task = (current->pid) & 0xff; - h->function = function; + h->task = (current->pid) & 0xff; + h->function = function; - if ((result = ncp_do_request(server, request_size + sizeof(*h))) < 0) - { + if ((result = ncp_do_request(server, request_size + sizeof(*h))) < 0) { DPRINTK("ncp_request_error: %d\n", result); return result; } - - server->completion = reply->completion_code; + server->completion = reply->completion_code; server->conn_status = reply->connection_state; - server->reply_size = result; + server->reply_size = result; server->ncp_reply_size = result - sizeof(struct ncp_reply_header); result = reply->completion_code; - if (result != 0) - { + if (result != 0) { DPRINTK("ncp_completion_code: %x\n", result); } - return result; + return result; } -int -ncp_connect(struct ncp_server *server) +int ncp_connect(struct ncp_server *server) { struct ncp_request_header *h - = (struct ncp_request_header *)(server->packet); + = (struct ncp_request_header *) (server->packet); int result; h->type = NCP_ALLOC_SLOT_REQUEST; - + server->sequence = 0; - h->sequence = server->sequence; - h->conn_low = 0xff; + h->sequence = server->sequence; + h->conn_low = 0xff; h->conn_high = 0xff; - h->task = (current->pid) & 0xff; - h->function = 0; + h->task = (current->pid) & 0xff; + h->function = 0; - if ((result = ncp_do_request(server, sizeof(*h))) < 0) - { + if ((result = ncp_do_request(server, sizeof(*h))) < 0) { return result; } - server->sequence = 0; server->connection = h->conn_low + (h->conn_high * 256); return 0; } - -int -ncp_disconnect(struct ncp_server *server) + +int ncp_disconnect(struct ncp_server *server) { struct ncp_request_header *h - = (struct ncp_request_header *)(server->packet); + = (struct ncp_request_header *) (server->packet); h->type = NCP_DEALLOC_SLOT_REQUEST; - + server->sequence += 1; - h->sequence = server->sequence; - h->conn_low = (server->connection) & 0xff; + h->sequence = server->sequence; + h->conn_low = (server->connection) & 0xff; h->conn_high = ((server->connection) & 0xff00) >> 8; - h->task = (current->pid) & 0xff; - h->function = 0; + h->task = (current->pid) & 0xff; + h->function = 0; return ncp_do_request(server, sizeof(*h)); } -void -ncp_lock_server(struct ncp_server *server) +void ncp_lock_server(struct ncp_server *server) { #if 0 /* For testing, only 1 process */ - if (server->lock != 0) - { + if (server->lock != 0) { DPRINTK("ncpfs: server locked!!!\n"); } #endif - while (server->lock) + while (server->lock) sleep_on(&server->wait); server->lock = 1; } -void -ncp_unlock_server(struct ncp_server *server) +void ncp_unlock_server(struct ncp_server *server) { - if (server->lock != 1) - { - printk("ncp_unlock_server: was not locked!\n"); - } - - server->lock = 0; - wake_up(&server->wait); + if (server->lock != 1) { + printk("ncp_unlock_server: was not locked!\n"); + } + server->lock = 0; + wake_up(&server->wait); } - diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index a7612d20c..df7fe151d 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -8,11 +8,11 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := nfs.o -O_OBJS := proc.o sock.o rpcsock.o inode.o file.o bio.o \ - nfsiod.o dir.o symlink.o +O_OBJS := inode.o file.o read.o write.o dir.o symlink.o proc.o \ + nfs2xdr.o ifdef CONFIG_ROOT_NFS -O_OBJS += nfsroot.o + O_OBJS += nfsroot.o mount_clnt.o endif M_OBJS := $(O_TARGET) diff --git a/fs/nfs/README b/fs/nfs/README deleted file mode 100644 index f87cb3629..000000000 --- a/fs/nfs/README +++ /dev/null @@ -1,114 +0,0 @@ - - - This is an NFS client for Linux that supports async RPC calls for - read-ahead (and hopefully soon, write-back) on regular files. - - The implementation uses a straightforward nfsiod scheme. After - trying out a number of different concepts, I finally got back to - this concept, because everything else either didn't work or gave me - headaches. It's not flashy, but it works without hacking into any - other regions of the kernel. - - - HOW TO USE - - This stuff compiles as a loadable module (I developed it on 1.3.77). - Simply type mkmodule, and insmod nfs.o. This will start four nfsiod's - at the same time (which will show up under the pseudonym of insmod in - ps-style listings). - - Alternatively, you can put it right into the kernel: remove everything - from fs/nfs, move the Makefile and all *.c to this directory, and - copy all *.h files to include/linux. - - After mounting, you should be able to watch (with tcpdump) several - RPC READ calls being placed simultaneously. - - - HOW IT WORKS - - When a process reads from a file on an NFS volume, the following - happens: - - * nfs_file_read sets file->f_reada if more than 1K is - read at once. It then calls generic_file_read. - - * generic_file_read requests one ore more pages via - nfs_readpage. - - * nfs_readpage allocates a request slot with an nfsiod - daemon, fills in the READ request, sends out the - RPC call, kicks the daemon, and returns. - If there's no free biod, nfs_readpage places the - call directly, waiting for the reply (sync readpage). - - * nfsiod calls nfs_rpc_doio to collect the reply. If the - call was successful, it sets page->uptodate and - wakes up all processes waiting on page->wait; - - This is the rough outline only. There are a few things to note: - - * Async RPC will not be tried when server->rsize < PAGE_SIZE. - - * When an error occurs, nfsiod has no way of returning - the error code to the user process. Therefore, it flags - page->error and wakes up all processes waiting on that - page (they usually do so from within generic_readpage). - - generic_readpage finds that the page is still not - uptodate, and calls nfs_readpage again. This time around, - nfs_readpage notices that page->error is set and - unconditionally does a synchronous RPC call. - - This area needs a lot of improvement, since read errors - are not that uncommon (e.g. we have to retransmit calls - if the fsuid is different from the ruid in order to - cope with root squashing and stuff like this). - - Retransmits with fsuid/ruid change should be handled by - nfsiod, but this doesn't come easily (a more general nfs_call - routine that does all this may be useful...) - - * To save some time on readaheads, we save one data copy - by frobbing the page into the iovec passed to the - RPC code so that the networking layer copies the - data into the page directly. - - This needs to be adjustable (different authentication - flavors; AUTH_NULL versus AUTH_SHORT verifiers). - - * Currently, a fixed number of nfsiod's is spawned from - within init_nfs_fs. This is problematic when running - as a loadable module, because this will keep insmod's - memory allocated. As a side-effect, you will see the - nfsiod processes listed as several insmod's when doing - a `ps.' - - * This NFS client implements server congestion control via - Van Jacobson slow start as implemented in 44BSD. I haven't - checked how well this behaves, but since Rick Macklem did - it this way, it should be okay :-) - - - WISH LIST - - After giving this thing some testing, I'd like to add some more - features: - - * Some sort of async write handling. True write-back doesn't - work with the current kernel (I think), because invalidate_pages - kills all pages, regardless of whether they're dirty or not. - Besides, this may require special bdflush treatment because - write caching on clients is really hairy. - - Alternatively, a write-through scheme might be useful where - the client enqueues the request, but leaves collecting the - results to nfsiod. Again, we need a way to pass RPC errors - back to the application. - - * Support for different authentication flavors. - - * /proc/net/nfsclnt (for nfsstat, etc.). - -March 29, 1996 -Olaf Kirch <okir@monad.swb.de> diff --git a/fs/nfs/bio.c b/fs/nfs/bio.c deleted file mode 100644 index 178f8cc28..000000000 --- a/fs/nfs/bio.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * linux/fs/nfs/bio.c - * - * Block I/O for NFS - * - * Partial copy of Linus' read cache modifications to fs/nfs/file.c - * modified for async RPC by okir@monad.swb.de - * - * We do an ugly hack here in order to return proper error codes to the - * user program when a read request failed. This is a huge problem because - * generic_file_read only checks the return value of inode->i_op->readpage() - * which is usually 0 for async RPC. To overcome this obstacle, we set - * the error bit of the page to 1 when an error occurs, and make nfs_readpage - * transmit requests synchronously when encountering this. - * - * Another possible solution to this problem may be to have a cache of recent - * RPC call results indexed by page pointer, or even a result code field - * in struct page. - * - * June 96: Added retries of RPCs that seem to have failed for a transient - * reason. - */ - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/mm.h> -#include <linux/nfs_fs.h> -#include <linux/nfsiod.h> -#include <linux/malloc.h> -#include <linux/pagemap.h> - -#include <asm/uaccess.h> -#include <asm/system.h> - -#undef DEBUG_BIO -#ifdef DEBUG_BIO -#define dprintk(args...) printk(## args) -#else -#define dprintk(args...) /* nothing */ -#endif - -static inline int -do_read_nfs_sync(struct inode * inode, struct page * page) -{ - struct nfs_fattr fattr; - int result, refresh = 0; - int count = PAGE_SIZE; - int rsize = NFS_SERVER(inode)->rsize; - char *buf = (char *) page_address(page); - unsigned long pos = page->offset; - - dprintk("NFS: do_read_nfs_sync(%p)\n", page); - - set_bit(PG_locked, &page->flags); - clear_bit(PG_error, &page->flags); - - do { - if (count < rsize) - rsize = count; - result = nfs_proc_read(NFS_SERVER(inode), NFS_FH(inode), - pos, rsize, buf, &fattr); - dprintk("nfs_proc_read(%s, (%x,%lx), %ld, %d, %p) = %d\n", - NFS_SERVER(inode)->hostname, - inode->i_dev, inode->i_ino, - pos, rsize, buf, result); - /* - * Even if we had a partial success we can't mark the page - * cache valid. - */ - if (result < 0) - goto io_error; - refresh = 1; - count -= result; - pos += result; - buf += result; - if (result < rsize) - break; - } while (count); - - memset(buf, 0, count); - set_bit(PG_uptodate, &page->flags); - result = 0; - -io_error: - if (refresh) - nfs_refresh_inode(inode, &fattr); - clear_bit(PG_locked, &page->flags); - wake_up(&page->wait); - return result; -} - -/* - * This is the function to (re-) transmit an NFS readahead request - */ -static int -nfsiod_read_setup(struct nfsiod_req *req) -{ - struct inode *inode = req->rq_inode; - struct page *page = req->rq_page; - - return nfs_proc_read_request(&req->rq_rpcreq, - NFS_SERVER(inode), NFS_FH(inode), - page->offset, PAGE_SIZE, - (__u32 *) page_address(page)); -} - -/* - * This is the callback from nfsiod telling us whether a reply was - * received or some error occurred (timeout or socket shutdown). - */ -static int -nfsiod_read_result(int result, struct nfsiod_req *req) -{ - struct nfs_server *server = NFS_SERVER(req->rq_inode); - struct page *page = req->rq_page; - static int succ = 0, fail = 0; - int i; - - dprintk("BIO: received callback for page %p, result %d\n", - page, result); - - if (result >= 0) { - struct nfs_fattr fattr; - - result = nfs_proc_read_reply(&req->rq_rpcreq, &fattr); - if (result >= 0) { - nfs_refresh_inode(req->rq_inode, &fattr); - if (result < PAGE_SIZE) - memset((u8 *) page_address(page)+result, - 0, PAGE_SIZE-result); - } - } else - if (result == -ETIMEDOUT && !(server->flags & NFS_MOUNT_SOFT)) { - /* XXX: Theoretically, we'd have to increment the initial - * timeo here; but I'm not going to bother with this now - * because this old nfsiod stuff will soon die anyway. - */ - result = -EAGAIN; - } - - if (result == -EAGAIN && req->rq_retries--) { - dprintk("BIO: retransmitting request.\n"); - memset(&req->rq_rpcreq, 0, sizeof(struct rpc_ioreq)); - while (rpc_reserve(server->rsock, &req->rq_rpcreq, 1) < 0) - schedule(); - current->fsuid = req->rq_fsuid; - current->fsgid = req->rq_fsgid; - for (i = 0; i < NGROUPS; i++) - current->groups[i] = req->rq_groups[i]; - nfsiod_read_setup(req); - return 0; - } - if (result >= 0) { - set_bit(PG_uptodate, &page->flags); - succ++; - } else { - dprintk("BIO: %d successful reads, %d failures\n", succ, fail); - set_bit(PG_error, &page->flags); - fail++; - } - clear_bit(PG_locked, &page->flags); - wake_up(&page->wait); - free_page(page_address(page)); - return 1; -} - -static inline int -do_read_nfs_async(struct inode *inode, struct page *page) -{ - struct nfsiod_req *req; - int result, i; - - dprintk("NFS: do_read_nfs_async(%p)\n", page); - - set_bit(PG_locked, &page->flags); - clear_bit(PG_error, &page->flags); - - if (!(req = nfsiod_reserve(NFS_SERVER(inode)))) - return -EAGAIN; - - req->rq_retries = 5; - req->rq_callback = nfsiod_read_result; - req->rq_inode = inode; - req->rq_page = page; - - req->rq_fsuid = current->fsuid; - req->rq_fsgid = current->fsgid; - for (i = 0; i < NGROUPS; i++) - req->rq_groups[i] = current->groups[i]; - - if ((result = nfsiod_read_setup(req)) >= 0) { - page->count++; - nfsiod_enqueue(req); - } else { - dprintk("NFS: deferring async READ request.\n"); - nfsiod_release(req); - clear_bit(PG_locked, &page->flags); - wake_up(&page->wait); - } - - return result < 0? result : 0; -} - -int -nfs_readpage(struct inode *inode, struct page *page) -{ - unsigned long address; - int error = -1; - - dprintk("NFS: nfs_readpage %08lx\n", page_address(page)); - address = page_address(page); - page->count++; - if (!PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_SIZE) - error = do_read_nfs_async(inode, page); - if (error < 0) /* couldn't enqueue */ - error = do_read_nfs_sync(inode, page); - free_page(address); - return error; -} diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8f16dc799..a11b9fb6a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -6,19 +6,38 @@ * nfs directory handling functions * * 10 Apr 1996 Added silly rename for unlink --okir + * 28 Sep 1996 Improved directory cache --okir */ #include <linux/sched.h> #include <linux/errno.h> #include <linux/stat.h> -#include <linux/nfs_fs.h> #include <linux/fcntl.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/malloc.h> #include <linux/mm.h> +#include <linux/sunrpc/types.h> +#include <linux/nfs_fs.h> -#include <asm/uaccess.h> /* for fs functions */ +#include <asm/segment.h> /* for fs functions */ + +/* + * Head for a dircache entry. Currently still very simple; when + * the cache grows larger, we will need a LRU list. + */ +struct nfs_dirent { + dev_t dev; /* device number */ + ino_t ino; /* inode number */ + u32 cookie; /* cooke of first entry */ + unsigned short valid : 1, /* data is valid */ + locked : 1; /* entry locked */ + unsigned int size; /* # of entries */ + unsigned long age; /* last used */ + unsigned long mtime; /* last attr stamp */ + struct wait_queue * wait; + struct nfs_entry * entry; +}; static int nfs_dir_open(struct inode * inode, struct file * file); static long nfs_dir_read(struct inode *, struct file *, char *, unsigned long); @@ -64,42 +83,26 @@ struct inode_operations nfs_dir_inode_operations = { NULL, /* writepage */ NULL, /* bmap */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL, /* updatepage */ + nfs_revalidate, /* revalidate */ }; -static inline void revalidate_dir(struct nfs_server * server, struct inode * dir) +static int +nfs_dir_open(struct inode *dir, struct file *file) { - struct nfs_fattr fattr; - - if (jiffies - NFS_READTIME(dir) < NFS_ATTRTIMEO(dir)) - return; - - NFS_READTIME(dir) = jiffies; - if (nfs_proc_getattr(server, NFS_FH(dir), &fattr) == 0) { - nfs_refresh_inode(dir, &fattr); - if (fattr.mtime.seconds == NFS_OLDMTIME(dir)) { - if ((NFS_ATTRTIMEO(dir) <<= 1) > server->acdirmax) - NFS_ATTRTIMEO(dir) = server->acdirmax; - return; - } - NFS_OLDMTIME(dir) = fattr.mtime.seconds; - } - /* invalidate directory cache here when we _really_ start caching */ + dfprintk(VFS, "NFS: nfs_dir_open(%x/%ld)\n", dir->i_dev, dir->i_ino); + return nfs_revalidate_inode(NFS_SERVER(dir), dir); } -static int nfs_dir_open(struct inode * dir, struct file * file) -{ - revalidate_dir(NFS_SERVER(dir), dir); - return 0; -} - -static long nfs_dir_read(struct inode *inode, struct file *filp, - char *buf, unsigned long count) +static long +nfs_dir_read(struct inode *inode, struct file *filp, char *buf, unsigned long count) { return -EISDIR; } -static struct nfs_entry *c_entry = NULL; +static struct nfs_dirent dircache[NFS_MAX_DIRCACHE]; /* * We need to do caching of directory entries to prevent an @@ -107,119 +110,222 @@ static struct nfs_entry *c_entry = NULL; * directory is cached. This seems sufficient for most purposes. * Technically, we ought to flush the cache on close but this is * not a problem in practice. + * + * XXX: Do proper directory caching by stuffing data into the + * page cache (may require some fiddling for rsize < PAGE_SIZE). */ -static int nfs_readdir(struct inode *inode, struct file *filp, - void *dirent, filldir_t filldir) +static int +nfs_readdir(struct inode *inode, struct file *filp, void *dirent, + filldir_t filldir) { - static kdev_t c_dev = 0; - static int c_ino; - static int c_size; - - int result; - int i, index = 0; - struct nfs_entry *entry; - + static struct wait_queue *readdir_wait = NULL; + struct wait_queue **waitp = NULL; + struct nfs_dirent *cache, *free; + struct nfs_entry *entry; + unsigned long age, dead; + u32 cookie; + int ismydir, result; + int i, j, index = 0; + + dfprintk(VFS, "NFS: nfs_readdir(%x/%ld)\n", inode->i_dev, inode->i_ino); if (!inode || !S_ISDIR(inode->i_mode)) { printk("nfs_readdir: inode is NULL or not a directory\n"); return -EBADF; } - revalidate_dir(NFS_SERVER(inode), inode); + if ((result = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0) + return result; + + /* + * Try to find the entry in the cache + */ +again: + if (waitp) { + interruptible_sleep_on(waitp); + if (current->signal & ~current->blocked) + return -ERESTARTSYS; + waitp = NULL; + } + + cookie = filp->f_pos; + entry = NULL; + free = NULL; + age = ~(unsigned long) 0; + dead = jiffies - NFS_ATTRTIMEO(inode); + + for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) { + /* + dprintk("NFS: dircache[%d] valid %d locked %d\n", + i, cache->valid, cache->locked); + */ + ismydir = (cache->dev == inode->i_dev + && cache->ino == inode->i_ino); + if (cache->locked) { + if (!ismydir || cache->cookie != cookie) + continue; + dfprintk(DIRCACHE, "NFS: waiting on dircache entry\n"); + waitp = &cache->wait; + goto again; + } - /* initialize cache memory if it hasn't been used before */ + if (ismydir && cache->mtime != NFS_OLDMTIME(inode)) + cache->valid = 0; - if (c_entry == NULL) { - i = sizeof (struct nfs_entry)*NFS_READDIR_CACHE_SIZE; - c_entry = (struct nfs_entry *) kmalloc(i, GFP_KERNEL); - if (c_entry == NULL) { - printk("nfs_readdir: no MEMORY for cache\n"); - return -ENOMEM; + if (!cache->valid || cache->age < dead) { + free = cache; + age = 0; + } else if (cache->age < age) { + free = cache; + age = cache->age; } - for (i = 0; i < NFS_READDIR_CACHE_SIZE; i++) { - c_entry[i].name = (char *) kmalloc(NFS_MAXNAMLEN + 1, - GFP_KERNEL); - if (c_entry[i].name == NULL) { - printk("nfs_readdir: no MEMORY for cache\n"); - while (--i>=0) - kfree(c_entry[i].name); - kfree(c_entry); - c_entry = NULL; - return -ENOMEM; - } - } - } - entry = NULL; - /* try to find it in the cache */ + if (!ismydir || !cache->valid) + continue; - if (inode->i_dev == c_dev && inode->i_ino == c_ino) { - for (i = 0; i < c_size; i++) { - if (filp->f_pos == c_entry[i].cookie) { - if (i == c_size - 1) { - if (c_entry[i].eof) - return 0; - } - else - entry = c_entry + (index = i + 1); - break; + if (cache->cookie == cookie && cache->size > 0) { + entry = cache->entry + (index = 0); + cache->locked = 1; + break; + } + for (j = 0; j < cache->size; j++) { + /* + dprintk("NFS: examing entry %.*s @%d\n", + (int) cache->entry[j].length, + cache->entry[j].name, + cache->entry[j].cookie); + */ + if (cache->entry[j].cookie != cookie) + continue; + if (j < cache->size - 1) { + entry = cache->entry + (index = j + 1); + } else if (cache->entry[j].eof) { + return 0; } + break; + } + if (entry) { + dfprintk(DIRCACHE, "NFS: found dircache entry %d\n", + (int)(cache - dircache)); + cache->locked = 1; + break; } } - /* if we didn't find it in the cache, revert to an nfs call */ - - if (!entry) { - result = nfs_proc_readdir(NFS_SERVER(inode), NFS_FH(inode), - filp->f_pos, NFS_READDIR_CACHE_SIZE, c_entry); - if (result < 0) { - c_dev = 0; - return result; + /* + * Okay, entry not present in cache, or locked and inaccessible. + * Set up the cache entry and attempt a READDIR call. + */ + if (entry == NULL) { + if ((cache = free) == NULL) { + dfprintk(DIRCACHE, "NFS: dircache contention\n"); + waitp = &readdir_wait; + goto again; } - if (result > 0) { - c_dev = inode->i_dev; - c_ino = inode->i_ino; - c_size = result; - entry = c_entry + (index = 0); + dfprintk(DIRCACHE, "NFS: using free dircache entry %d\n", + (int)(free - dircache)); + cache->cookie = cookie; + cache->locked = 1; + cache->valid = 0; + cache->dev = inode->i_dev; + cache->ino = inode->i_ino; + if (!cache->entry) { + cache->entry = (struct nfs_entry *) + get_free_page(GFP_KERNEL); + if (!cache->entry) { + result = -ENOMEM; + goto done; + } } - } - /* if we found it in the cache or from an nfs call, return results */ - if (!entry) - return 0; - while (index < c_size) { - int nextpos = entry->cookie; - if (filldir(dirent, entry->name, strlen(entry->name), filp->f_pos, entry->fileid) < 0) - break; - filp->f_pos = nextpos; - /* revalidate the cache if we slept in filldir() */ - if (inode->i_dev != c_dev) - break; - if (inode->i_ino != c_ino) + result = nfs_proc_readdir(NFS_SERVER(inode), NFS_FH(inode), + cookie, PAGE_SIZE, cache->entry); + if (result <= 0) + goto done; + cache->size = result; + cache->valid = 1; + entry = cache->entry + (index = 0); + } + cache->mtime = NFS_OLDMTIME(inode); + cache->age = jiffies; + + /* + * Yowza! We have a cache entry... + */ + while (index < cache->size) { + int nextpos = entry->cookie; + + /* + dprintk("NFS: filldir(%p, %.*s, %d, %d, %x, eof %x)\n", entry, + (int) entry->length, entry->name, entry->length, + (unsigned int) filp->f_pos, + entry->fileid, entry->eof); + */ + + if (filldir(dirent, entry->name, entry->length, cookie, entry->fileid) < 0) break; - if (nextpos != entry->cookie) + cookie = nextpos; + if (nextpos != entry->cookie) { + printk("nfs_readdir: shouldn't happen!\n"); break; + } index++; entry++; } - return 0; + filp->f_pos = cookie; + result = 0; + + /* XXX: May want to kick async readdir-ahead here. Not too hard + * to do. */ + +done: + dfprintk(DIRCACHE, "NFS: nfs_readdir complete\n"); + cache->locked = 0; + wake_up(&cache->wait); + wake_up(&readdir_wait); + + return result; } /* - * free cache memory - * called from cleanup_module + * Invalidate dircache entries for inode */ - -void nfs_kfree_cache(void) +void +nfs_invalidate_dircache(struct inode *inode) { - int i; + struct nfs_dirent *cache; + dev_t dev = inode->i_dev; + ino_t ino = inode->i_ino; + int i; - if (c_entry == NULL) - return; - for (i = 0; i < NFS_READDIR_CACHE_SIZE; i++) - kfree(c_entry[i].name); - kfree(c_entry); - c_entry = NULL; + dfprintk(DIRCACHE, "NFS: invalidate dircache for %x/%ld\n", dev, (long)ino); + for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) { + if (!cache->locked && cache->dev == dev && cache->ino == ino) + cache->valid = 0; /* brute force */ + } +} + +/* + * Free directory cache memory + * Called from cleanup_module + */ +void +nfs_free_dircache(void) +{ + struct nfs_dirent *cache; + int i; + + dfprintk(DIRCACHE, "NFS: freeing dircache\n"); + for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) { + cache->valid = 0; + if (cache->locked) { + printk("nfs_kfree_cache: locked entry in dircache!\n"); + continue; + } + if (cache->entry) + free_page((unsigned long) cache->entry); + cache->entry = NULL; + } } @@ -234,15 +340,18 @@ void nfs_kfree_cache(void) * Since the cache is not hashed yet, it is a good idea not to make it too * large because every lookup looks through the entire cache even * though most of them will fail. + * + * FIXME: The lookup cache should also cache failed lookups. This can + * be a considerable win on diskless clients. */ static struct nfs_lookup_cache_entry { - kdev_t dev; - int inode; - char filename[NFS_MAXNAMLEN + 1]; - struct nfs_fh fhandle; + kdev_t dev; + ino_t inode; + char filename[NFS_MAXNAMLEN + 1]; + struct nfs_fh fhandle; struct nfs_fattr fattr; - int expiration_date; + int expiration_date; } nfs_lookup_cache[NFS_LOOKUP_CACHE_SIZE]; static struct nfs_lookup_cache_entry *nfs_lookup_cache_index(struct inode *dir, @@ -269,6 +378,8 @@ static int nfs_lookup_cache_lookup(struct inode *dir, const char *filename, struct nfs_lookup_cache_entry *entry; + dfprintk(LOOKUPCACHE, "NFS: lookup_cache_lookup(%x/%ld, %s)\n", + dir->i_dev, dir->i_ino, filename); if (!nfs_lookup_cache_in_use) { memset(nfs_lookup_cache, 0, sizeof(nfs_lookup_cache)); nfs_lookup_cache_in_use = 1; @@ -292,6 +403,9 @@ static void nfs_lookup_cache_add(struct inode *dir, const char *filename, static int nfs_lookup_cache_pos = 0; struct nfs_lookup_cache_entry *entry; + dfprintk(LOOKUPCACHE, "NFS: lookup_cache_add(%x/%ld, %s\n", + dir->i_dev, dir->i_ino, filename); + /* compensate for bug in SGI NFS server */ if (fattr->size == -1 || fattr->uid == -1 || fattr->gid == -1 || fattr->atime.seconds == -1 || fattr->mtime.seconds == -1) @@ -301,6 +415,7 @@ static void nfs_lookup_cache_add(struct inode *dir, const char *filename, if (nfs_lookup_cache_pos == NFS_LOOKUP_CACHE_SIZE) nfs_lookup_cache_pos = 0; } + entry->dev = dir->i_dev; entry->inode = dir->i_ino; strcpy(entry->filename, filename); @@ -314,9 +429,9 @@ static void nfs_lookup_cache_remove(struct inode *dir, struct inode *inode, const char *filename) { struct nfs_lookup_cache_entry *entry; - kdev_t dev; - int fileid; - int i; + kdev_t dev; + ino_t fileid; + int i; if (inode) { dev = inode->i_dev; @@ -328,6 +443,10 @@ static void nfs_lookup_cache_remove(struct inode *dir, struct inode *inode, } else return; + + dfprintk(LOOKUPCACHE, "NFS: lookup_cache_remove(%x/%ld)\n", + dev, (long)fileid); + for (i = 0; i < NFS_LOOKUP_CACHE_SIZE; i++) { entry = nfs_lookup_cache + i; if (entry->dev == dev && entry->fattr.fileid == fileid) @@ -358,6 +477,9 @@ static int nfs_lookup(struct inode *dir, const char *__name, int len, char name[len > NFS_MAXNAMLEN? 1 : len+1]; int error; + dfprintk(VFS, "NFS: lookup(%x/%ld, %.*s)\n", + dir->i_dev, dir->i_ino, len, __name); + *result = NULL; if (!dir || !S_ISDIR(dir->i_mode)) { printk("nfs_lookup: inode is NULL or not a directory\n"); @@ -399,6 +521,9 @@ static int nfs_create(struct inode *dir, const char *name, int len, int mode, struct nfs_fh fhandle; int error; + dfprintk(VFS, "NFS: create(%x/%ld, %s\n", + dir->i_dev, dir->i_ino, name); + *result = NULL; if (!dir || !S_ISDIR(dir->i_mode)) { printk("nfs_create: inode is NULL or not a directory\n"); @@ -422,6 +547,7 @@ static int nfs_create(struct inode *dir, const char *name, int len, int mode, return -EACCES; } nfs_lookup_cache_add(dir, name, &fhandle, &fattr); + nfs_invalidate_dircache(dir); iput(dir); return 0; } @@ -434,6 +560,9 @@ static int nfs_mknod(struct inode *dir, const char *name, int len, struct nfs_fh fhandle; int error; + dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n", + dir->i_dev, dir->i_ino, name); + if (!dir || !S_ISDIR(dir->i_mode)) { printk("nfs_mknod: inode is NULL or not a directory\n"); iput(dir); @@ -453,12 +582,8 @@ static int nfs_mknod(struct inode *dir, const char *name, int len, error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir), name, &sattr, &fhandle, &fattr); if (!error) - { nfs_lookup_cache_add(dir, name, &fhandle, &fattr); - /* The parent dir inode count may have changed ! */ - nfs_lookup_cache_remove( NULL, dir, NULL); - } - + nfs_invalidate_dircache(dir); iput(dir); return error; } @@ -470,6 +595,9 @@ static int nfs_mkdir(struct inode *dir, const char *name, int len, int mode) struct nfs_fh fhandle; int error; + dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n", + dir->i_dev, dir->i_ino, name); + if (!dir || !S_ISDIR(dir->i_mode)) { printk("nfs_mkdir: inode is NULL or not a directory\n"); iput(dir); @@ -484,12 +612,9 @@ static int nfs_mkdir(struct inode *dir, const char *name, int len, int mode) sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir), name, &sattr, &fhandle, &fattr); - if (!error) { - if (fattr.fileid == dir->i_ino) - printk("Sony NewsOS 4.1R buggy nfs server?\n"); - else - nfs_lookup_cache_add(dir, name, &fhandle, &fattr); - } + if (!error) + nfs_lookup_cache_add(dir, name, &fhandle, &fattr); + nfs_invalidate_dircache(dir); iput(dir); return error; } @@ -498,6 +623,9 @@ static int nfs_rmdir(struct inode *dir, const char *name, int len) { int error; + dfprintk(VFS, "NFS: rmdir(%x/%ld, %s\n", + dir->i_dev, dir->i_ino, name); + if (!dir || !S_ISDIR(dir->i_mode)) { printk("nfs_rmdir: inode is NULL or not a directory\n"); iput(dir); @@ -508,7 +636,9 @@ static int nfs_rmdir(struct inode *dir, const char *name, int len) return -ENAMETOOLONG; } error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dir), name); - nfs_lookup_cache_remove(dir, NULL, name); + if (!error) + nfs_lookup_cache_remove(dir, NULL, name); + nfs_invalidate_dircache(dir); iput(dir); return error; } @@ -522,46 +652,78 @@ static int nfs_sillyrename(struct inode *dir, const char *name, int len) dir->i_count++; if (nfs_lookup(dir, name, len, &inode) < 0) return -EIO; /* arbitrary */ - if (inode->i_count == 1 || NFS_RENAMED_DIR(inode)) { + + if (inode->i_count == 1) { + iput(inode); + return -EIO; + } + if (NFS_RENAMED_DIR(inode)) { + iput(NFS_RENAMED_DIR(inode)); + NFS_RENAMED_DIR(inode) = NULL; iput(inode); return -EIO; } - slen = sprintf(silly, ".nfs%ld", inode->i_ino); + slen = sprintf(silly, ".nfs%ld", inode->i_ino); if (len == slen && !strncmp(name, silly, len)) { iput(inode); return -EIO; /* DWIM */ } + + dfprintk(VFS, "NFS: sillyrename(%x/%ld, %s)\n", + dir->i_dev, dir->i_ino, name); + ret = nfs_proc_rename(NFS_SERVER(dir), NFS_FH(dir), name, - NFS_FH(dir), silly, 0); + NFS_FH(dir), silly); if (ret >= 0) { nfs_lookup_cache_remove(dir, NULL, name); nfs_lookup_cache_remove(dir, NULL, silly); NFS_RENAMED_DIR(inode) = dir; dir->i_count++; } + nfs_invalidate_dircache(dir); iput(inode); return ret; } +/* + * When releasing the inode, finally remove any unlinked but open files. + * Note that we have to clear the set of pending signals temporarily; + * otherwise the RPC call will fail. + */ void nfs_sillyrename_cleanup(struct inode *inode) { + unsigned long oldsig; struct inode *dir = NFS_RENAMED_DIR(inode); char silly[14]; int error, slen; + dfprintk(VFS, "NFS: sillyrename cleanup(%x/%ld)\n", + inode->i_dev, inode->i_ino); + + oldsig = current->signal; + current->signal = 0; + slen = sprintf(silly, ".nfs%ld", inode->i_ino); - if ((error = nfs_unlink(dir, silly, slen)) < 0) { - printk("NFS silly_rename cleanup failed (err = %d)\n", - -error); - } + error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dir), silly); + if (error < 0) + printk("NFS: silly_rename cleanup failed (err %d)\n", -error); + + nfs_lookup_cache_remove(dir, NULL, silly); + nfs_invalidate_dircache(dir); NFS_RENAMED_DIR(inode) = NULL; + iput(dir); + + current->signal |= oldsig; } static int nfs_unlink(struct inode *dir, const char *name, int len) { int error; + dfprintk(VFS, "NFS: unlink(%x/%ld, %s)\n", + dir->i_dev, dir->i_ino, name); + if (!dir || !S_ISDIR(dir->i_mode)) { printk("nfs_unlink: inode is NULL or not a directory\n"); iput(dir); @@ -573,8 +735,10 @@ static int nfs_unlink(struct inode *dir, const char *name, int len) } if ((error = nfs_sillyrename(dir, name, len)) < 0) { error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dir), name); - nfs_lookup_cache_remove(dir, NULL, name); + if (!error) + nfs_lookup_cache_remove(dir, NULL, name); } + nfs_invalidate_dircache(dir); iput(dir); return error; } @@ -585,6 +749,9 @@ static int nfs_symlink(struct inode *dir, const char *name, int len, struct nfs_sattr sattr; int error; + dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n", + dir->i_dev, dir->i_ino, name, symname); + if (!dir || !S_ISDIR(dir->i_mode)) { printk("nfs_symlink: inode is NULL or not a directory\n"); iput(dir); @@ -603,6 +770,7 @@ static int nfs_symlink(struct inode *dir, const char *name, int len, sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir), name, symname, &sattr); + nfs_invalidate_dircache(dir); iput(dir); return error; } @@ -612,6 +780,10 @@ static int nfs_link(struct inode *oldinode, struct inode *dir, { int error; + dfprintk(VFS, "NFS: link(%x/%ld -> %x/%ld, %s)\n", + oldinode->i_dev, oldinode->i_ino, + dir->i_dev, dir->i_ino, name); + if (!oldinode) { printk("nfs_link: old inode is NULL\n"); iput(oldinode); @@ -631,19 +803,35 @@ static int nfs_link(struct inode *oldinode, struct inode *dir, } error = nfs_proc_link(NFS_SERVER(oldinode), NFS_FH(oldinode), NFS_FH(dir), name); - - nfs_lookup_cache_remove(dir, oldinode, NULL); + if (!error) { + nfs_lookup_cache_remove(dir, oldinode, NULL); + NFS_READTIME(oldinode) = 0; /* force getattr */ + } + nfs_invalidate_dircache(dir); iput(oldinode); iput(dir); return error; } +/* + * RENAME + * FIXME: Some nfsds, like the Linux user space nfsd, may generate a + * different file handle for the same inode after a rename (e.g. when + * moving to a different directory). A fail-safe method to do so would + * be to look up old_dir/old_name, create a link to new_dir/new_name and + * rename the old file using the silly_rename stuff. This way, the original + * file in old_dir will go away when the last process iput()s the inode. + */ static int nfs_rename(struct inode *old_dir, const char *old_name, int old_len, struct inode *new_dir, const char *new_name, int new_len, int must_be_dir) { int error; + dfprintk(VFS, "NFS: rename(%x/%ld, %s -> %x/%ld, %s)\n", + old_dir->i_dev, old_dir->i_ino, old_name, + new_dir->i_dev, new_dir->i_ino, new_name); + if (!old_dir || !S_ISDIR(old_dir->i_mode)) { printk("nfs_rename: old inode is NULL or not a directory\n"); iput(old_dir); @@ -661,13 +849,20 @@ static int nfs_rename(struct inode *old_dir, const char *old_name, int old_len, iput(new_dir); return -ENAMETOOLONG; } + + /* We don't do rename() with trailing slashes over NFS now. Hmm. */ + if (must_be_dir) + return -EINVAL; + error = nfs_proc_rename(NFS_SERVER(old_dir), NFS_FH(old_dir), old_name, - NFS_FH(new_dir), new_name, - must_be_dir); - - nfs_lookup_cache_remove(old_dir, NULL, old_name); - nfs_lookup_cache_remove(new_dir, NULL, new_name); + NFS_FH(new_dir), new_name); + if (!error) { + nfs_lookup_cache_remove(old_dir, NULL, old_name); + nfs_lookup_cache_remove(new_dir, NULL, new_name); + } + nfs_invalidate_dircache(old_dir); + nfs_invalidate_dircache(new_dir); iput(old_dir); iput(new_dir); return error; @@ -683,6 +878,9 @@ void nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) { int was_empty; + dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n", + inode->i_dev, inode->i_ino, inode->i_count); + if (!inode || !fattr) { printk("nfs_refresh_inode: inode or fattr is NULL\n"); return; @@ -698,10 +896,16 @@ void nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_gid = fattr->gid; /* Size changed from outside: invalidate caches on next read */ - if (inode->i_size != fattr->size) + if (inode->i_size != fattr->size) { + dfprintk(PAGECACHE, "NFS: cacheinv(%x/%ld)\n", + inode->i_dev, inode->i_ino); NFS_CACHEINV(inode); - if (NFS_OLDMTIME(inode) != fattr->mtime.seconds) + } + if (NFS_OLDMTIME(inode) != fattr->mtime.seconds) { + dfprintk(PAGECACHE, "NFS: mtime change on %x/%ld\n", + inode->i_dev, inode->i_ino); NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + } inode->i_size = fattr->size; if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) inode->i_rdev = to_kdev_t(fattr->rdev); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5ec19b0c2..ca42719bd 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -21,18 +21,24 @@ #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/stat.h> -#include <linux/mm.h> #include <linux/nfs_fs.h> +#include <linux/mm.h> #include <linux/malloc.h> #include <linux/pagemap.h> +#include <linux/lockd/bind.h> -#include <asm/uaccess.h> +#include <asm/segment.h> #include <asm/system.h> -static int nfs_file_mmap(struct inode *, struct file *, struct vm_area_struct *); +#define NFSDBG_FACILITY NFSDBG_FILE + +static int nfs_file_mmap(struct inode *, struct file *, + struct vm_area_struct *); static long nfs_file_read(struct inode *, struct file *, char *, unsigned long); -static long nfs_file_write(struct inode *, struct file *, const char *, unsigned long); -static int nfs_fsync(struct inode *, struct file *); +static long nfs_file_write(struct inode *, struct file *, + const char *, unsigned long); +static int nfs_file_close(struct inode *, struct file *); +static int nfs_fsync(struct inode *, struct file *); static struct file_operations nfs_file_operations = { NULL, /* lseek - default */ @@ -43,8 +49,12 @@ static struct file_operations nfs_file_operations = { NULL, /* ioctl - default */ nfs_file_mmap, /* mmap */ NULL, /* no special open is needed */ - NULL, /* release */ + nfs_file_close, /* release */ nfs_fsync, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL, /* revalidate */ + nfs_lock, /* lock */ }; struct inode_operations nfs_file_inode_operations = { @@ -61,97 +71,156 @@ struct inode_operations nfs_file_inode_operations = { NULL, /* readlink */ NULL, /* follow_link */ nfs_readpage, /* readpage */ - NULL, /* writepage */ + nfs_writepage, /* writepage */ NULL, /* bmap */ - NULL /* truncate */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + nfs_updatepage, /* updatepage */ + nfs_revalidate, /* revalidate */ }; -static inline void revalidate_inode(struct nfs_server * server, struct inode * inode) +/* Hack for future NFS swap support */ +#ifndef IS_SWAPFILE +# define IS_SWAPFILE(inode) (0) +#endif + + +static int +nfs_file_close(struct inode *inode, struct file *file) { - struct nfs_fattr fattr; - - if (jiffies - NFS_READTIME(inode) < NFS_ATTRTIMEO(inode)) - return; - - NFS_READTIME(inode) = jiffies; - if (nfs_proc_getattr(server, NFS_FH(inode), &fattr) == 0) { - nfs_refresh_inode(inode, &fattr); - if (fattr.mtime.seconds == NFS_OLDMTIME(inode)) { - if ((NFS_ATTRTIMEO(inode) <<= 1) > server->acregmax) - NFS_ATTRTIMEO(inode) = server->acregmax; - return; - } - NFS_OLDMTIME(inode) = fattr.mtime.seconds; - } - invalidate_inode_pages(inode); -} + int status; + dfprintk(VFS, "nfs: close(%x/%ld)\n", inode->i_dev, inode->i_ino); -static long nfs_file_read(struct inode * inode, struct file * file, - char * buf, unsigned long count) + if ((status = nfs_flush_dirty_pages(inode, 0, 0)) < 0) + return status; + return nfs_write_error(inode); +} + +static long +nfs_file_read(struct inode * inode, struct file * file, + char * buf, unsigned long count) { - revalidate_inode(NFS_SERVER(inode), inode); + int status; + + dfprintk(VFS, "nfs: read(%x/%ld, %lu@%lu)\n", + inode->i_dev, inode->i_ino, count, + (unsigned long) file->f_pos); + + if ((status = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0) + return status; return generic_file_read(inode, file, buf, count); } -static int nfs_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma) +static int +nfs_file_mmap(struct inode * inode, struct file * file, + struct vm_area_struct * vma) { - revalidate_inode(NFS_SERVER(inode), inode); + int status; + + dfprintk(VFS, "nfs: mmap(%x/%ld)\n", inode->i_dev, inode->i_ino); + + if ((status = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0) + return status; return generic_file_mmap(inode, file, vma); } static int nfs_fsync(struct inode *inode, struct file *file) { - return 0; + dfprintk(VFS, "nfs: fsync(%x/%ld)\n", inode->i_dev, inode->i_ino); + + return nfs_flush_dirty_pages(inode, 0, 0); } -static long nfs_file_write(struct inode *inode, struct file *file, - const char *buf, unsigned long count) +/* + * Write to a file (through the page cache). + */ +static long +nfs_file_write(struct inode *inode, struct file *file, + const char *buf, unsigned long count) { - int result, written, wsize; - struct nfs_fattr fattr; - unsigned long pos; + int result; + + dfprintk(VFS, "nfs: write(%x/%ld (%d), %lu@%lu)\n", + inode->i_dev, inode->i_ino, inode->i_count, + count, (unsigned long) file->f_pos); if (!inode) { printk("nfs_file_write: inode = NULL\n"); return -EINVAL; } + if (IS_SWAPFILE(inode)) { + printk("NFS: attempt to write to active swap file!\n"); + return -EBUSY; + } + if ((result = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0) + return result; if (!S_ISREG(inode->i_mode)) { printk("nfs_file_write: write to non-file, mode %07o\n", - (unsigned int) inode->i_mode); + inode->i_mode); return -EINVAL; } - if (count == 0) + if (count <= 0) return 0; - pos = file->f_pos; - if (file->f_flags & O_APPEND) - pos = inode->i_size; - wsize = NFS_SERVER(inode)->wsize; - result = 0; - written = 0; - while (written < count) { - int hunk = count - written; - if (hunk >= wsize) - hunk = wsize; - result = nfs_proc_write(inode, - pos, hunk, buf, &fattr); - if (result < 0) - break; - pos += hunk; - buf += hunk; - written += hunk; - if (hunk < wsize) - break; - } - if (!written) + /* Return error from previous async call */ + if ((result = nfs_write_error(inode)) < 0) return result; - file->f_pos = pos; - if (pos > inode->i_size) - inode->i_size = pos; - /* Avoid possible Solaris 2.5 nfsd bug */ - if (inode->i_ino == fattr.fileid) - nfs_refresh_inode(inode, &fattr); - return written; + + return generic_file_write(inode, file, buf, count); } +/* + * Lock a (portion of) a file + */ +int +nfs_lock(struct inode *inode, struct file *filp, int cmd, struct file_lock *fl) +{ + int status; + + dprintk("NFS: nfs_lock(f=%4x/%ld, t=%x, fl=%x, r=%ld:%ld)\n", + filp->f_inode->i_dev, filp->f_inode->i_ino, + fl->fl_type, fl->fl_flags, + fl->fl_start, fl->fl_end); + + if (!(inode = filp->f_inode)) + return -EINVAL; + + /* No mandatory locks over NFS */ + if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + return -ENOLCK; + + /* Fake OK code if mounted without NLM support */ + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) + return 0; + + /* + * No BSD flocks over NFS allowed. + * Note: we could try to fake a POSIX lock request here by + * using ((u32) filp | 0x80000000) or some such as the pid. + * Not sure whether that would be unique, though, or whether + * that would break in other places. + */ + if (!fl->fl_owner || (fl->fl_flags & (FL_POSIX|FL_BROKEN)) != FL_POSIX) + return -ENOLCK; + + /* If unlocking a file region, flush dirty pages (unless we've + * been killed by a signal, that is). */ + if (cmd == F_SETLK && fl->fl_type == F_UNLCK + && !(current->signal & ~current->blocked)) { + status = nfs_flush_dirty_pages(inode, + fl->fl_start, fl->fl_end == NLM_OFFSET_MAX? 0 : + fl->fl_end - fl->fl_start + 1); + if (status < 0) + return status; + } + + if ((status = nlmclnt_proc(inode, cmd, fl)) < 0) + return status; + + /* Here, we could turn off write-back of pages in the + * locked file region */ + + return 0; +} diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e47227011..7f883270a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -13,28 +13,26 @@ * */ +#include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> -#include <linux/nfs_fs.h> -#include <linux/nfsiod.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> #include <linux/locks.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> +#include <linux/unistd.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/stats.h> +#include <linux/nfs_fs.h> +#include <linux/lockd/bind.h> #include <asm/system.h> #include <asm/uaccess.h> -/* This is for kernel_thread */ -#define __KERNEL_SYSCALLS__ -#include <linux/unistd.h> - -extern int close_fp(struct file *filp); +#define NFSDBG_FACILITY NFSDBG_VFS static int nfs_notify_change(struct inode *, struct iattr *); static void nfs_put_inode(struct inode *); @@ -53,6 +51,8 @@ static struct super_operations nfs_sops = { NULL }; +struct rpc_stat nfs_rpcstat = { &nfs_program }; + /* * The "read_inode" function doesn't actually do anything: * the real data is filled in later in nfs_fhget. Here we @@ -60,31 +60,38 @@ static struct super_operations nfs_sops = { * (the latter makes "nfs_refresh_inode" do the right thing * wrt pipe inodes) */ -static void nfs_read_inode(struct inode * inode) +static void +nfs_read_inode(struct inode * inode) { - int rsize = inode->i_sb->u.nfs_sb.s_server.rsize; - int size = inode->i_sb->u.nfs_sb.s_server.wsize; - - if (rsize > size) - size = rsize; - inode->i_blksize = size; + inode->i_blksize = inode->i_sb->s_blocksize; inode->i_mode = 0; inode->i_op = NULL; NFS_CACHEINV(inode); } -static void nfs_put_inode(struct inode * inode) +static void +nfs_put_inode(struct inode * inode) { + dprintk("NFS: put_inode(%x/%ld)\n", inode->i_dev, inode->i_ino); + if (NFS_RENAMED_DIR(inode)) nfs_sillyrename_cleanup(inode); if (inode->i_pipe) clear_inode(inode); } -void nfs_put_super(struct super_block *sb) +void +nfs_put_super(struct super_block *sb) { - close_fp(sb->u.nfs_sb.s_server.file); - rpc_closesock(sb->u.nfs_sb.s_server.rsock); + struct nfs_server *server = &sb->u.nfs_sb.s_server; + struct rpc_clnt *rpc; + + if ((rpc = server->client) != NULL) + rpc_shutdown_client(rpc); + + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_down(); /* release rpc.lockd */ + rpciod_down(); /* release rpciod */ lock_super(sb); sb->s_dev = 0; unlock_super(sb); @@ -92,22 +99,50 @@ void nfs_put_super(struct super_block *sb) } /* - * The way this works is that the mount process passes a structure - * in the data argument which contains an open socket to the NFS - * server and the root file handle obtained from the server's mount - * daemon. We stash these away in the private superblock fields. - * Later we can add other mount parameters like caching values. + * Compute and set NFS server blocksize */ - -struct super_block *nfs_read_super(struct super_block *sb, void *raw_data, - int silent) +static unsigned int +nfs_block_size(unsigned int bsize, unsigned char *nrbitsp) { - struct nfs_mount_data *data = (struct nfs_mount_data *) raw_data; - struct nfs_server *server; - unsigned int fd; - struct file *filp; + if (bsize < 1024) + bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) + bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; + + /* make sure blocksize is a power of two */ + if ((bsize & (bsize - 1)) || nrbitsp) { + unsigned int nrbits; + + for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) + ; + bsize = 1 << nrbits; + if (nrbitsp) + *nrbitsp = nrbits; + if (bsize < NFS_DEF_FILE_IO_BUFFER_SIZE) + bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + } - kdev_t dev = sb->s_dev; + return bsize; +} + +/* + * The way this works is that the mount process passes a structure + * in the data argument which contains the server's IP address + * and the root file handle obtained from the server's mount + * daemon. We stash these away in the private superblock fields. + */ +struct super_block * +nfs_read_super(struct super_block *sb, void *raw_data, int silent) +{ + struct nfs_mount_data *data = (struct nfs_mount_data *) raw_data; + struct sockaddr_in srvaddr; + struct nfs_server *server; + struct rpc_timeout timeparms; + struct rpc_xprt *xprt; + struct rpc_clnt *clnt; + unsigned int authflavor; + int tcp; + kdev_t dev = sb->s_dev; MOD_INC_USE_COUNT; if (!data) { @@ -116,97 +151,107 @@ struct super_block *nfs_read_super(struct super_block *sb, void *raw_data, MOD_DEC_USE_COUNT; return NULL; } - fd = data->fd; if (data->version != NFS_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", data->version < NFS_MOUNT_VERSION ? "older" : "newer"); + if (data->version < 2) + data->namlen = 0; + if (data->version < 3) + data->bsize = 0; } - if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) { - printk("nfs_read_super: invalid file descriptor\n"); - sb->s_dev = 0; - MOD_DEC_USE_COUNT; - return NULL; - } - if (!S_ISSOCK(filp->f_inode->i_mode)) { - printk("nfs_read_super: not a socket\n"); - sb->s_dev = 0; - MOD_DEC_USE_COUNT; - return NULL; - } - filp->f_count++; + lock_super(sb); - sb->s_blocksize = 1024; /* XXX */ - sb->s_blocksize_bits = 10; - sb->s_magic = NFS_SUPER_MAGIC; - sb->s_dev = dev; - sb->s_op = &nfs_sops; - server = &sb->u.nfs_sb.s_server; - server->file = filp; - server->lock = 0; - server->wait = NULL; - server->flags = data->flags; - server->rsize = data->rsize; - if (server->rsize <= 0) - server->rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - else if (server->rsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) - server->rsize = NFS_MAX_FILE_IO_BUFFER_SIZE; - server->wsize = data->wsize; - if (server->wsize <= 0) - server->wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - else if (server->wsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) - server->wsize = NFS_MAX_FILE_IO_BUFFER_SIZE; - server->timeo = data->timeo*HZ/10; - server->retrans = data->retrans; + server = &sb->u.nfs_sb.s_server; + sb->s_magic = NFS_SUPER_MAGIC; + sb->s_dev = dev; + sb->s_op = &nfs_sops; + sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + server->rsize = nfs_block_size(data->rsize, NULL); + server->wsize = nfs_block_size(data->wsize, NULL); + server->flags = data->flags; server->acregmin = data->acregmin*HZ; server->acregmax = data->acregmax*HZ; server->acdirmin = data->acdirmin*HZ; server->acdirmax = data->acdirmax*HZ; strcpy(server->hostname, data->hostname); + sb->u.nfs_sb.s_root = data->root; + + /* We now require that the mount process passes the remote address */ + memcpy(&srvaddr, &data->addr, sizeof(srvaddr)); + if (srvaddr.sin_addr.s_addr == INADDR_ANY) { + printk("NFS: mount program didn't pass remote address!\n"); + MOD_DEC_USE_COUNT; + return NULL; + } - /* Start of JSP NFS patch */ - /* Check if passed address in data->addr */ - if (data->addr.sin_addr.s_addr == INADDR_ANY) { /* No address passed */ - if (((struct sockaddr_in *)(&server->toaddr))->sin_addr.s_addr == INADDR_ANY) { - printk("NFS: Error passed unconnected socket and no address\n") ; - MOD_DEC_USE_COUNT; - return NULL ; - } else { - /* Need access to socket internals JSP */ - struct socket *sock; - int dummylen ; - - /* printk("NFS: using socket address\n") ;*/ - - sock = &((filp->f_inode)->u.socket_i); - - /* extract the other end of the socket into server->toaddr */ - sock->ops->getname(sock, &(server->toaddr), &dummylen, 1) ; - } + /* Which protocol do we use? */ + tcp = (data->flags & NFS_MOUNT_TCP); + + /* Initialize timeout values */ + timeparms.to_initval = data->timeo * HZ / 10; + timeparms.to_retries = data->retrans; + timeparms.to_maxval = tcp? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT; + timeparms.to_exponential = 1; + + /* Choose authentication flavor */ + if (data->flags & NFS_MOUNT_SECURE) { + authflavor = RPC_AUTH_DES; + } else if (data->flags & NFS_MOUNT_KERBEROS) { + authflavor = RPC_AUTH_KRB; } else { - /* printk("NFS: copying passed addr to server->toaddr\n") ;*/ - memcpy((char *)&(server->toaddr),(char *)(&data->addr),sizeof(server->toaddr)); + authflavor = RPC_AUTH_UNIX; } - /* End of JSP NFS patch */ - if ((server->rsock = rpc_makesock(filp)) == NULL) { - printk("NFS: cannot create RPC socket.\n"); - MOD_DEC_USE_COUNT; - return NULL; + /* Now create transport and client */ + xprt = xprt_create_proto(tcp? IPPROTO_TCP : IPPROTO_UDP, + &srvaddr, &timeparms); + if (xprt == NULL) { + printk("NFS: cannot create RPC transport.\n"); + goto failure; } - sb->u.nfs_sb.s_root = data->root; + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + NFS_VERSION, authflavor); + if (clnt == NULL) { + printk("NFS: cannot create RPC client.\n"); + xprt_destroy(xprt); + goto failure; + } + + clnt->cl_intr = (data->flags & NFS_MOUNT_INTR)? 1 : 0; + clnt->cl_softrtry = (data->flags & NFS_MOUNT_SOFT)? 1 : 0; + clnt->cl_chatty = 1; + server->client = clnt; + + /* Fire up rpciod if not yet running */ + rpciod_up(); + + /* Unlock super block and try to get root fh attributes */ unlock_super(sb); - if (!(sb->s_mounted = nfs_fhget(sb, &data->root, NULL))) { - sb->s_dev = 0; - printk("nfs_read_super: get root inode failed\n"); - MOD_DEC_USE_COUNT; - return NULL; + + if ((sb->s_mounted = nfs_fhget(sb, &data->root, NULL)) != NULL) { + /* We're airborne */ + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + return sb; } - return sb; + + /* Yargs. It didn't work out. */ + printk("nfs_read_super: get root inode failed\n"); + rpc_shutdown_client(server->client); + rpciod_down(); + +failure: + MOD_DEC_USE_COUNT; + if (sb->s_lock) + unlock_super(sb); + sb->s_dev = 0; + return NULL; } -void nfs_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) +static void +nfs_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) { int error; struct nfs_fsinfo res; @@ -237,9 +282,9 @@ void nfs_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) * We just have to be careful not to subvert iget's special handling * of mount points. */ - -struct inode *nfs_fhget(struct super_block *sb, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +struct inode * +nfs_fhget(struct super_block *sb, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) { struct nfs_fattr newfattr; int error; @@ -270,39 +315,43 @@ struct inode *nfs_fhget(struct super_block *sb, struct nfs_fh *fhandle, *NFS_FH(inode) = *fhandle; nfs_refresh_inode(inode, fattr); } + dprintk("NFS: fhget(%x/%ld ct=%d)\n", + inode->i_dev, inode->i_ino, inode->i_count); + return inode; } -int nfs_notify_change(struct inode *inode, struct iattr *attr) +int +nfs_notify_change(struct inode *inode, struct iattr *attr) { struct nfs_sattr sattr; struct nfs_fattr fattr; int error; - sattr.mode = (unsigned) -1; + sattr.mode = (u32) -1; if (attr->ia_valid & ATTR_MODE) sattr.mode = attr->ia_mode; - sattr.uid = (unsigned) -1; + sattr.uid = (u32) -1; if (attr->ia_valid & ATTR_UID) sattr.uid = attr->ia_uid; - sattr.gid = (unsigned) -1; + sattr.gid = (u32) -1; if (attr->ia_valid & ATTR_GID) sattr.gid = attr->ia_gid; - sattr.size = (unsigned) -1; - if (attr->ia_valid & ATTR_SIZE) - sattr.size = S_ISREG(inode->i_mode) ? attr->ia_size : -1; + sattr.size = (u32) -1; + if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode)) + sattr.size = attr->ia_size; - sattr.mtime.seconds = sattr.mtime.useconds = (unsigned) -1; + sattr.mtime.seconds = sattr.mtime.useconds = (u32) -1; if (attr->ia_valid & ATTR_MTIME) { sattr.mtime.seconds = attr->ia_mtime; sattr.mtime.useconds = 0; } - sattr.atime.seconds = sattr.atime.useconds = (unsigned) -1; + sattr.atime.seconds = sattr.atime.useconds = (u32) -1; if (attr->ia_valid & ATTR_ATIME) { sattr.atime.seconds = attr->ia_atime; sattr.atime.useconds = 0; @@ -310,69 +359,117 @@ int nfs_notify_change(struct inode *inode, struct iattr *attr) error = nfs_proc_setattr(NFS_SERVER(inode), NFS_FH(inode), &sattr, &fattr); - if (!error) + if (!error) { + nfs_truncate_dirty_pages(inode, sattr.size); nfs_refresh_inode(inode, &fattr); + } inode->i_dirt = 0; return error; } -/* Every kernel module contains stuff like this. */ - -static struct file_system_type nfs_fs_type = { - nfs_read_super, "nfs", 0, NULL -}; +/* + * Externally visible revalidation function + */ +int +nfs_revalidate(struct inode *inode) +{ + return nfs_revalidate_inode(NFS_SERVER(inode), inode); +} /* - * Start up an nfsiod process. This is an awful hack, because when running - * as a module, we will keep insmod's memory. Besides, the current->comm - * hack won't work in this case - * The best would be to have a syscall for nfs client control that (among - * other things) forks biod's. - * Alternatively, we might want to have the idle task spawn biod's on demand. + * This function is called whenever some part of NFS notices that + * the cached attributes have to be refreshed. + * + * This is a bit tricky because we have to make sure all dirty pages + * have been sent off to the server before calling invalidate_inode_pages. + * To make sure no other process adds more write requests while we try + * our best to flush them, we make them sleep during the attribute refresh. + * + * A very similar scenario holds for the dir cache. */ -static int run_nfsiod(void *dummy) +int +_nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - int ret; + struct nfs_fattr fattr; + int status; + + if (jiffies - NFS_READTIME(inode) < NFS_ATTRTIMEO(inode)) + return 0; + + dfprintk(PAGECACHE, "NFS: revalidating %x/%ld inode\n", + inode->i_dev, inode->i_ino); + NFS_READTIME(inode) = jiffies; + if ((status = nfs_proc_getattr(server, NFS_FH(inode), &fattr)) < 0) + goto done; + + nfs_refresh_inode(inode, &fattr); + if (fattr.mtime.seconds != NFS_OLDMTIME(inode)) { + if (!S_ISDIR(inode->i_mode)) { + /* This sends off all dirty pages off to the server. + * Note that this function must not sleep. */ + nfs_invalidate_pages(inode); + invalidate_inode_pages(inode); + } else { + nfs_invalidate_dircache(inode); + } -#ifdef __SMP__ - lock_kernel(); - syscall_count++; -#endif + NFS_OLDMTIME(inode) = fattr.mtime.seconds; + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + } else { + /* Update attrtimeo value */ + if ((NFS_ATTRTIMEO(inode) <<= 1) > NFS_MAXATTRTIMEO(inode)) + NFS_ATTRTIMEO(inode) = NFS_MAXATTRTIMEO(inode); + } + status = 0; - MOD_INC_USE_COUNT; - exit_mm(current); - current->session = 1; - current->pgrp = 1; - sprintf(current->comm, "nfsiod"); - ret = nfsiod(); - MOD_DEC_USE_COUNT; - return ret; +done: + dfprintk(PAGECACHE, + "NFS: inode %x/%ld revalidation complete (status %d).\n", + inode->i_dev, inode->i_ino, status); + return status; } -int init_nfs_fs(void) +/* + * File system information + */ +static struct file_system_type nfs_fs_type = { + nfs_read_super, "nfs", 0, NULL +}; + +/* + * Initialize NFS + */ +int +init_nfs_fs(void) { - /* Fork four biod's */ - kernel_thread(run_nfsiod, NULL, 0); - kernel_thread(run_nfsiod, NULL, 0); - kernel_thread(run_nfsiod, NULL, 0); - kernel_thread(run_nfsiod, NULL, 0); +#ifdef CONFIG_PROC_FS + rpc_proc_register(&nfs_rpcstat); +#endif return register_filesystem(&nfs_fs_type); } +/* + * Every kernel module contains stuff like this. + */ #ifdef MODULE -int init_module(void) -{ - int status; - if ((status = init_nfs_fs()) == 0) - register_symtab(0); - return status; +EXPORT_NO_SYMBOLS; +/* Not quite true; I just maintain it */ +MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); + +int +init_module(void) +{ + return init_nfs_fs(); } -void cleanup_module(void) +void +cleanup_module(void) { +#ifdef CONFIG_PROC_FS + rpc_proc_unregister("nfs"); +#endif unregister_filesystem(&nfs_fs_type); - nfs_kfree_cache(); + nfs_free_dircache(); } - #endif diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c new file mode 100644 index 000000000..81da8f996 --- /dev/null +++ b/fs/nfs/mount_clnt.c @@ -0,0 +1,141 @@ +/* + * linux/fs/nfs/mount_clnt.c + * + * MOUNT client to support NFSroot. + * + * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/uio.h> +#include <linux/net.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/sched.h> +#include <linux/nfs_fs.h> + +#ifdef RPC_DEBUG +# define NFSDBG_FACILITY NFSDBG_ROOT +#endif + +/* +#define MOUNT_PROGRAM 100005 +#define MOUNT_VERSION 1 +#define MOUNT_MNT 1 +#define MOUNT_UMNT 3 + */ + +static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *); +extern struct rpc_program mnt_program; + +struct mnt_fhstatus { + unsigned int status; + struct nfs_fh * fh; +}; + +/* + * Obtain an NFS file handle for the given host and path + */ +int +nfs_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh) +{ + struct rpc_clnt *mnt_clnt; + struct mnt_fhstatus result = { 0, fh }; + char hostname[32]; + int status; + + dprintk("NFS: nfs_mount(%08x:%s)\n", + (unsigned)ntohl(addr->sin_addr.s_addr), path); + + strcpy(hostname, in_ntoa(addr->sin_addr.s_addr)); + if (!(mnt_clnt = mnt_create(hostname, addr))) + return -EACCES; + + status = rpc_call(mnt_clnt, NFS_MNTPROC_MNT, path, &result, 0); + return status < 0? status : (result.status? -EACCES : 0); +} + +static struct rpc_clnt * +mnt_create(char *hostname, struct sockaddr_in *srvaddr) +{ + struct rpc_xprt *xprt; + struct rpc_clnt *clnt; + + if (!(xprt = xprt_create_proto(IPPROTO_UDP, srvaddr, NULL))) + return NULL; + + clnt = rpc_create_client(xprt, hostname, + &mnt_program, NFS_MNT_VERSION, + RPC_AUTH_NULL); + if (!clnt) { + xprt_destroy(xprt); + } else { + clnt->cl_softrtry = 1; + clnt->cl_chatty = 1; + clnt->cl_oneshot = 1; + } + return clnt; +} + +/* + * XDR encode/decode functions for MOUNT + */ +static int +xdr_error(struct rpc_rqst *req, u32 *p, void *dummy) +{ + return -EIO; +} + +static int +xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path) +{ + p = xdr_encode_string(p, path); + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int +xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res) +{ + if ((res->status = ntohl(*p++)) == 0) + memcpy(res->fh, p, sizeof(*res->fh)); + return 0; +} + +#define MNT_dirpath_sz (1 + 256) +#define MNT_fhstatus_sz (1 + 8) + +static struct rpc_procinfo mnt_procedures[2] = { + { "mnt_null", + (kxdrproc_t) xdr_error, + (kxdrproc_t) xdr_error, 0, 0 }, + { "mnt_mount", + (kxdrproc_t) xdr_encode_dirpath, + (kxdrproc_t) xdr_decode_fhstatus, + MNT_dirpath_sz, MNT_fhstatus_sz }, +}; + +static struct rpc_version mnt_version1 = { + 1, 2, mnt_procedures +}; + +static struct rpc_version * mnt_version[] = { + NULL, + &mnt_version1, +}; + +static struct rpc_stat mnt_stats; + +struct rpc_program mnt_program = { + "mount", + NFS_MNT_PROGRAM, + sizeof(mnt_version)/sizeof(mnt_version[0]), + mnt_version, + &mnt_stats, +}; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c new file mode 100644 index 000000000..5eec5eb65 --- /dev/null +++ b/fs/nfs/nfs2xdr.c @@ -0,0 +1,635 @@ +/* + * linux/fs/nfs/xdr.c + * + * XDR functions to encode/decode NFS RPC arguments and results. + * + * Copyright (C) 1992, 1993, 1994 Rick Sladkey + * Copyright (C) 1996 Olaf Kirch + */ + +#define NFS_NEED_XDR_TYPES + +#include <linux/param.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/malloc.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/proc_fs.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> + +#define NFSDBG_FACILITY NFSDBG_XDR + +#define QUADLEN(len) (((len) + 3) >> 2) +static int nfs_stat_to_errno(int stat); + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +/* + * Declare the space requirements for NFS arguments and replies as + * number of 32bit-words + */ +#define NFS_fhandle_sz 8 +#define NFS_sattr_sz 8 +#define NFS_filename_sz 1+(NFS_MAXNAMLEN>>2) +#define NFS_path_sz 1+(NFS_MAXPATHLEN>>2) +#define NFS_fattr_sz 17 +#define NFS_info_sz 5 +#define NFS_entry_sz NFS_filename_sz+3 + +#define NFS_enc_void_sz 0 +#define NFS_diropargs_sz NFS_fhandle_sz+NFS_filename_sz +#define NFS_sattrargs_sz NFS_fhandle_sz+NFS_sattr_sz +#define NFS_readargs_sz NFS_fhandle_sz+3 +#define NFS_writeargs_sz NFS_fhandle_sz+4 +#define NFS_createargs_sz NFS_diropargs_sz+NFS_sattr_sz +#define NFS_renameargs_sz NFS_diropargs_sz+NFS_diropargs_sz +#define NFS_linkargs_sz NFS_fhandle_sz+NFS_diropargs_sz +#define NFS_symlinkargs_sz NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz +#define NFS_readdirargs_sz NFS_fhandle_sz+2 + +#define NFS_dec_void_sz 0 +#define NFS_attrstat_sz 1+NFS_fattr_sz +#define NFS_diropres_sz 1+NFS_fhandle_sz+NFS_fattr_sz +#define NFS_readlinkres_sz 1+NFS_path_sz +#define NFS_readres_sz 1+NFS_fattr_sz+1 +#define NFS_stat_sz 1 +#define NFS_readdirres_sz 1 +#define NFS_statfsres_sz 1+NFS_info_sz + +/* + * Common NFS XDR functions as inlines + */ +static inline u32 * +xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle) +{ + *((struct nfs_fh *) p) = *fhandle; + return p + QUADLEN(sizeof(*fhandle)); +} + +static inline u32 * +xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle) +{ + *fhandle = *((struct nfs_fh *) p); + return p + QUADLEN(sizeof(*fhandle)); +} + +static inline u32 * +xdr_decode_string2(u32 *p, char **string, unsigned int *len, + unsigned int maxlen) +{ + *len = ntohl(*p++); + if (*len > maxlen) + return NULL; + *string = (char *) p; + return p + QUADLEN(*len); +} + +static inline u32 * +xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) +{ + fattr->type = (enum nfs_ftype) ntohl(*p++); + fattr->mode = ntohl(*p++); + fattr->nlink = ntohl(*p++); + fattr->uid = ntohl(*p++); + fattr->gid = ntohl(*p++); + fattr->size = ntohl(*p++); + fattr->blocksize = ntohl(*p++); + fattr->rdev = ntohl(*p++); + fattr->blocks = ntohl(*p++); + fattr->fsid = ntohl(*p++); + fattr->fileid = ntohl(*p++); + fattr->atime.seconds = ntohl(*p++); + fattr->atime.useconds = ntohl(*p++); + fattr->mtime.seconds = ntohl(*p++); + fattr->mtime.useconds = ntohl(*p++); + fattr->ctime.seconds = ntohl(*p++); + fattr->ctime.useconds = ntohl(*p++); + return p; +} + +static inline u32 * +xdr_encode_sattr(u32 *p, struct nfs_sattr *sattr) +{ + *p++ = htonl(sattr->mode); + *p++ = htonl(sattr->uid); + *p++ = htonl(sattr->gid); + *p++ = htonl(sattr->size); + *p++ = htonl(sattr->atime.seconds); + *p++ = htonl(sattr->atime.useconds); + *p++ = htonl(sattr->mtime.seconds); + *p++ = htonl(sattr->mtime.useconds); + return p; +} + +/* + * NFS encode functions + */ +/* + * Encode void argument + */ +static int +nfs_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy) +{ + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode file handle argument + * GETATTR, READLINK, STATFS + */ +static int +nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh) +{ + p = xdr_encode_fhandle(p, fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SETATTR arguments + */ +static int +nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode directory ops argument + * LOOKUP, REMOVE, RMDIR + */ +static int +nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_string(p, args->name); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Arguments to a READ call. Since we read data directly into the page + * cache, we also set up the reply iovec here so that iov[1] points + * exactly to the page wewant to fetch. + */ +static int +nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen, buflen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->offset); + *p++ = htonl(args->count); + *p++ = htonl(args->count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + +#if 1 + /* set up reply iovec */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; + buflen = req->rq_rvec[0].iov_len; + req->rq_rvec[0].iov_len = replen; + req->rq_rvec[1].iov_base = args->buffer; + req->rq_rvec[1].iov_len = args->count; + req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; + req->rq_rvec[2].iov_len = buflen - replen; + req->rq_rlen = args->count + buflen; + req->rq_rnr = 3; +#else + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; + req->rq_rvec[0].iov_len = replen; +#endif + + return 0; +} + +/* + * Decode READ reply + */ +static int +nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) +{ + struct iovec *iov = req->rq_rvec; + int status, count, recvd, hdrlen; + + dprintk("RPC: readres OK status %lx\n", (long)ntohl(*p)); + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + p = xdr_decode_fattr(p, res->fattr); + + count = ntohl(*p++); + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + recvd = req->rq_rlen - hdrlen; + if (p != iov[2].iov_base) { + /* Unexpected reply header size. Punt. + * XXX: Move iovec contents to align data on page + * boundary and adjust RPC header size guess */ + printk("NFS: Odd RPC header size in read reply: %d\n", hdrlen); + return -errno_NFSERR_IO; + } + if (count > recvd) { + printk("NFS: server cheating in read reply: " + "count %d > recvd %d\n", count, recvd); + count = recvd; + } + + dprintk("RPC: readres OK count %d\n", count); + if (count < res->count) + memset((u8 *)(iov[1].iov_base+count), 0, res->count-count); + + return count; +} + + +/* + * Write arguments. Splice the buffer to be written into the iovec. + */ +static int +nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->offset); + *p++ = htonl(args->offset); + *p++ = htonl(args->count); + *p++ = htonl(args->count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + req->rq_svec[1].iov_base = (void *) args->buffer; + req->rq_svec[1].iov_len = args->count; + req->rq_slen += args->count; + req->rq_snr = 2; + + return 0; +} + +/* + * Encode create arguments + * CREATE, MKDIR + */ +static int +nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_string(p, args->name); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode RENAME arguments + */ +static int +nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_string(p, args->fromname); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_string(p, args->toname); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode LINK arguments + */ +static int +nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_string(p, args->toname); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SYMLINK arguments + */ +static int +nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_string(p, args->fromname); + p = xdr_encode_string(p, args->topath); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode arguments to readdir call + */ +static int +nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->cookie); + *p++ = htonl(args->bufsiz); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* set up reply iovec */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; + /* + dprintk("RPC: readdirargs: slack is 4 * (%d + %d + %d) = %d\n", + RPC_REPHDRSIZE, auth->au_rslack, NFS_readdirres_sz, replen); + */ + req->rq_rvec[0].iov_len = replen; + req->rq_rvec[1].iov_base = args->buffer; + req->rq_rvec[1].iov_len = args->bufsiz; + req->rq_rlen = replen + args->bufsiz; + req->rq_rnr = 2; + + /* + dprintk("RPC: readdirargs set up reply vec:\n"); + dprintk(" rvec[0] = %p/%d\n", + req->rq_rvec[0].iov_base, + req->rq_rvec[0].iov_len); + dprintk(" rvec[1] = %p/%d\n", + req->rq_rvec[1].iov_base, + req->rq_rvec[1].iov_len); + */ + + return 0; +} + +/* + * Decode the result of a readdir call. We decode the result in place + * to avoid a malloc of NFS_MAXNAMLEN+1 for each file name. + * After decoding, the layout in memory looks like this: + * entry1 entry2 ... entryN <space> stringN ... string2 string1 + * Note that the strings are not null-terminated so that the entire number + * of entries returned by the server should fit into the buffer. + */ +static int +nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) +{ + struct nfs_entry *entry; + struct iovec *iov = req->rq_rvec; + int status, nr, len; + char *string; + u32 *end; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + if ((void *) p != ((u8 *) iov->iov_base+iov->iov_len)) { + /* Unexpected reply header size. Punt. */ + printk("NFS: Odd RPC header size in readdirres reply\n"); + return -errno_NFSERR_IO; + } + + /* Get start and end address of XDR data */ + p = (u32 *) iov[1].iov_base; + end = (u32 *) ((u8 *) p + iov[1].iov_len); + + /* Get start and end of dirent buffer */ + entry = (struct nfs_entry *) res->buffer; + string = (char *) res->buffer + res->bufsiz; + for (nr = 0; *p++; nr++, entry++) { + entry->fileid = ntohl(*p++); + + len = ntohl(*p++); + if ((p + QUADLEN(len) + 3) > end) { + printk(KERN_NOTICE + "NFS: short packet in readdir reply!\n"); + break; + } + if (len > NFS_MAXNAMLEN) { + printk("NFS: giant filename in readdir (len %x)!\n", + len); + return -errno_NFSERR_IO; + } + string -= len; + if ((void *) (entry+1) > (void *) string) { + /* This may actually happen because an nfs_entry + * will take up more space than the XDR data. On + * 32bit machines that's due to 8byte alignment, + * on 64bit machines that's because the char * takes + * up 2 longs. + * + * THIS IS BAD! + */ + printk(KERN_NOTICE "NFS: should not happen in %s!\n", + __FUNCTION__); + break; + } + + entry->name = string; + entry->length = len; + memmove(string, p, len); + p += QUADLEN(len); + entry->cookie = ntohl(*p++); + entry->eof = !p[0] && p[1]; + } + return nr; +} + +/* + * NFS XDR decode functions + */ +/* + * Decode void reply + */ +static int +nfs_xdr_dec_void(struct rpc_rqst *req, u32 *p, void *dummy) +{ + return 0; +} + +/* + * Decode simple status reply + */ +static int +nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy) +{ + int status; + + if ((status = ntohl(*p++)) != 0) + status = -nfs_stat_to_errno(status); + return status; +} + +/* + * Decode attrstat reply + * GETATTR, SETATTR, WRITE + */ +static int +nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status; + + dprintk("RPC: attrstat status %lx\n", (long)ntohl(*p)); + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + xdr_decode_fattr(p, fattr); + dprintk("RPC: attrstat OK type %d mode %o dev %x ino %x\n", + fattr->type, fattr->mode, fattr->fsid, fattr->fileid); + return 0; +} + +/* + * Decode diropres reply + * LOOKUP, CREATE, MKDIR + */ +static int +nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res) +{ + int status; + + dprintk("RPC: diropres status %lx\n", (long)ntohl(*p)); + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + p = xdr_decode_fhandle(p, res->fh); + xdr_decode_fattr(p, res->fattr); + dprintk("RPC: diropres OK type %x mode %o dev %x ino %x\n", + res->fattr->type, res->fattr->mode, + res->fattr->fsid, res->fattr->fileid); + return 0; +} + +/* + * Decode READLINK reply + */ +static int +nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res) +{ + int status; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + xdr_decode_string2(p, res->string, res->lenp, res->maxlen); + + /* Caller takes over the buffer here to avoid extra copy */ + res->buffer = req->rq_task->tk_buffer; + req->rq_task->tk_buffer = NULL; + return 0; +} + +/* + * Decode STATFS reply + */ +static int +nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +{ + int status; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + res->tsize = ntohl(*p++); + res->bsize = ntohl(*p++); + res->blocks = ntohl(*p++); + res->bfree = ntohl(*p++); + res->bavail = ntohl(*p++); + return 0; +} + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS_OK, 0 }, + { NFSERR_PERM, EPERM }, + { NFSERR_NOENT, ENOENT }, + { NFSERR_IO, errno_NFSERR_IO }, + { NFSERR_NXIO, ENXIO }, + { NFSERR_EAGAIN, EAGAIN }, + { NFSERR_ACCES, EACCES }, + { NFSERR_EXIST, EEXIST }, + { NFSERR_NODEV, ENODEV }, + { NFSERR_NOTDIR, ENOTDIR }, + { NFSERR_ISDIR, EISDIR }, + { NFSERR_INVAL, EINVAL }, + { NFSERR_FBIG, EFBIG }, + { NFSERR_NOSPC, ENOSPC }, + { NFSERR_ROFS, EROFS }, + { NFSERR_NAMETOOLONG, ENAMETOOLONG }, + { NFSERR_NOTEMPTY, ENOTEMPTY }, + { NFSERR_DQUOT, EDQUOT }, + { NFSERR_STALE, ESTALE }, +#ifdef EWFLUSH + { NFSERR_WFLUSH, EWFLUSH }, +#endif + { -1, EIO } +}; + +static int +nfs_stat_to_errno(int stat) +{ + int i; + + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + printk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); + return nfs_errtbl[i].errno; +} + +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +#define PROC(proc, argtype, restype) \ + { "nfs_" #proc, \ + (kxdrproc_t) nfs_xdr_##argtype, \ + (kxdrproc_t) nfs_xdr_##restype, \ + MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2 \ + } + +static struct rpc_procinfo nfs_procedures[18] = { + PROC(null, enc_void, dec_void), + PROC(getattr, fhandle, attrstat), + PROC(setattr, sattrargs, attrstat), + PROC(root, enc_void, dec_void), + PROC(lookup, diropargs, diropres), + PROC(readlink, fhandle, readlinkres), + PROC(read, readargs, readres), + PROC(writecache, enc_void, dec_void), + PROC(write, writeargs, attrstat), + PROC(create, createargs, diropres), + PROC(remove, diropargs, stat), + PROC(rename, renameargs, stat), + PROC(link, linkargs, stat), + PROC(symlink, symlinkargs, stat), + PROC(mkdir, createargs, diropres), + PROC(rmdir, diropargs, stat), + PROC(readdir, readdirargs, readdirres), + PROC(statfs, fhandle, statfsres), +}; + +static struct rpc_version nfs_version2 = { + 2, + sizeof(nfs_procedures)/sizeof(nfs_procedures[0]), + nfs_procedures +}; + +static struct rpc_version * nfs_version[] = { + NULL, + NULL, + &nfs_version2 +}; + +struct rpc_program nfs_program = { + "nfs", + NFS_PROGRAM, + sizeof(nfs_version) / sizeof(nfs_version[0]), + nfs_version, + &nfs_rpcstat, +}; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c new file mode 100644 index 000000000..f48a6217c --- /dev/null +++ b/fs/nfs/nfs3xdr.c @@ -0,0 +1,669 @@ +/* + * linux/fs/nfs/nfs2xdr.c + * + * XDR functions to encode/decode NFSv3 RPC arguments and results. + * Note: this is incomplete! + * + * Copyright (C) 1996 Olaf Kirch + */ + +#define NFS_NEED_XDR_TYPES + +#include <linux/param.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/malloc.h> +#include <linux/nfs_fs.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/proc_fs.h> +#include <linux/sunrpc/clnt.h> + +#ifdef RPC_DEBUG +# define RPC_FACILITY RPCDBG_NFS +#endif + +#define QUADLEN(len) (((len) + 3) >> 2) +static int nfs_stat_to_errno(int stat); + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +/* + * Declare the space requirements for NFS arguments and replies as + * number of 32bit-words + */ +#define NFS_fhandle_sz (1+16) +#define NFS_sattr_sz 8 +#define NFS_filename_sz 1+(NFS_MAXNAMLEN>>2) +#define NFS_path_sz 1+(NFS_MAXPATHLEN>>2) +#define NFS_fattr_sz 17 +#define NFS_info_sz 5 +#define NFS_entry_sz NFS_filename_sz+3 + +#define NFS_enc_void_sz 0 +#define NFS_diropargs_sz NFS_fhandle_sz+NFS_filename_sz +#define NFS_sattrargs_sz NFS_fhandle_sz+NFS_sattr_sz +#define NFS_readargs_sz NFS_fhandle_sz+3 +#define NFS_writeargs_sz NFS_fhandle_sz+4 +#define NFS_createargs_sz NFS_diropargs_sz+NFS_sattr_sz +#define NFS_renameargs_sz NFS_diropargs_sz+NFS_diropargs_sz +#define NFS_linkargs_sz NFS_fhandle_sz+NFS_diropargs_sz +#define NFS_symlinkargs_sz NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz +#define NFS_readdirargs_sz NFS_fhandle_sz+2 + +#define NFS_dec_void_sz 0 +#define NFS_attrstat_sz 1+NFS_fattr_sz +#define NFS_diropres_sz 1+NFS_fhandle_sz+NFS_fattr_sz +#define NFS_readlinkres_sz 1+NFS_path_sz +#define NFS_readres_sz 1+NFS_fattr_sz+1 +#define NFS_stat_sz 1 +#define NFS_readdirres_sz 1 +#define NFS_statfsres_sz 1+NFS_info_sz + +/* + * Common NFS XDR functions as inlines + */ +static inline u32 * +xdr_encode_fhandle(u32 *p, struct nfs3_fh *fh) +{ + *p++ = htonl(fh->size); + memcpy(p, fh->data, fh->size); + return p + QUADLEN(fh->size); +} + +static inline u32 * +xdr_decode_fhandle(u32 *p, struct nfs3_fh *fh) +{ + if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) { + memcpy(fh->data, p, fh->size); + return p + QUADLEN(fh->size); + } + return NULL; +} + +static inline enum nfs_ftype +xdr_decode_ftype(u32 type) +{ + return (type == NF3FIFO)? NFFIFO : (enum nfs_ftype) type; +} + +static inline u32 * +xdr_decode_string2(u32 *p, char **string, unsigned int *len, + unsigned int maxlen) +{ + *len = ntohl(*p++); + if (*len > maxlen) + return NULL; + *string = (char *) p; + return p + QUADLEN(*len); +} + +static inline u32 * +xdr_decode_fattr(u32 *p, struct nfs3_fattr *fattr) +{ + fattr->type = xdr_decode_ftype(ntohl(*p++)); + fattr->mode = ntohl(*p++); + fattr->nlink = ntohl(*p++); + fattr->uid = ntohl(*p++); + fattr->gid = ntohl(*p++); + fattr->size = ((u64) ntohl(*p++) << 32) | ntohl(*p++); + fattr->used = ((u64) ntohl(*p++) << 32) | ntohl(*p++); + fattr->rdev_maj = ntohl(*p++); + fattr->rdev_min = ntohl(*p++); + fattr->fsid = ntohl(*p++); + fattr->fileid = ntohl(*p++); + fattr->atime.seconds = ntohl(*p++); + fattr->atime.useconds = ntohl(*p++); + fattr->mtime.seconds = ntohl(*p++); + fattr->mtime.useconds = ntohl(*p++); + fattr->ctime.seconds = ntohl(*p++); + fattr->ctime.useconds = ntohl(*p++); + return p; +} + +static inline u32 * +xdr_encode_sattr(u32 *p, struct nfs_sattr *sattr) +{ + *p++ = htonl(sattr->mode); + *p++ = htonl(sattr->uid); + *p++ = htonl(sattr->gid); + *p++ = htonl(sattr->size >> 32); + *p++ = htonl(sattr->size & 0xFFFFFFFF); + *p++ = htonl(sattr->atime.seconds); + *p++ = htonl(sattr->atime.useconds); + *p++ = htonl(sattr->mtime.seconds); + *p++ = htonl(sattr->mtime.useconds); + return p; +} + +/* + * NFS encode functions + */ +/* + * Encode void argument + */ +static int +nfs_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy) +{ + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode file handle argument + * GETATTR, READLINK, STATFS + */ +static int +nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs3_fh *fh) +{ + p = xdr_encode_fhandle(p, fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SETATTR arguments + */ +static int +nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode directory ops argument + * LOOKUP, REMOVE, RMDIR + */ +static int +nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_string(p, args->name); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Arguments to a READ call. Since we read data directly into the page + * cache, we also set up the reply iovec here so that iov[1] points + * exactly to the page wewant to fetch. + */ +static int +nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen, buflen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->offset); + *p++ = htonl(args->count); + *p++ = htonl(args->count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + +#if 1 + /* set up reply iovec */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; + buflen = req->rq_rvec[0].iov_len; + req->rq_rvec[0].iov_len = replen; + req->rq_rvec[1].iov_base = args->buffer; + req->rq_rvec[1].iov_len = args->count; + req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; + req->rq_rvec[2].iov_len = buflen - replen; + req->rq_rlen = args->count + buflen; + req->rq_rnr = 3; +#else + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; + req->rq_rvec[0].iov_len = replen; +#endif + + return 0; +} + +/* + * Decode READ reply + */ +static int +nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) +{ + struct iovec *iov = req->rq_rvec; + int status, count, recvd, hdrlen; + + dprintk("RPC: readres OK status %lx\n", ntohl(*p)); + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + p = xdr_decode_fattr(p, res->fattr); + + count = ntohl(*p++); + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + recvd = req->rq_rlen - hdrlen; + if (p != iov[2].iov_base) { + /* Unexpected reply header size. Punt. + * XXX: Move iovec contents to align data on page + * boundary and adjust RPC header size guess */ + printk("NFS: Odd RPC header size in read reply: %d\n", hdrlen); + return -errno_NFSERR_IO; + } + if (count > recvd) { + printk("NFS: server cheating in read reply: " + "count %d > recvd %d\n", count, recvd); + count = recvd; + } + + dprintk("RPC: readres OK count %d\n", count); + if (count < res->count) + memset((u8 *)(iov[1].iov_base+count), 0, res->count-count); + + return count; +} + + +/* + * Write arguments. Splice the buffer to be written into the iovec. + */ +static int +nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->offset); + *p++ = htonl(args->offset); + *p++ = htonl(args->count); + *p++ = htonl(args->count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + req->rq_svec[1].iov_base = (void *) args->buffer; + req->rq_svec[1].iov_len = args->count; + req->rq_slen += args->count; + req->rq_snr = 2; + + return 0; +} + +/* + * Encode create arguments + * CREATE, MKDIR + */ +static int +nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_string(p, args->name); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode RENAME arguments + */ +static int +nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_string(p, args->fromname); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_string(p, args->toname); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode LINK arguments + */ +static int +nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_string(p, args->toname); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SYMLINK arguments + */ +static int +nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_string(p, args->fromname); + p = xdr_encode_string(p, args->topath); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode arguments to readdir call + */ +static int +nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->cookie); + *p++ = htonl(args->bufsiz); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* set up reply iovec */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; + /* + dprintk("RPC: readdirargs: slack is 4 * (%d + %d + %d) = %d\n", + RPC_REPHDRSIZE, auth->au_rslack, NFS_readdirres_sz, replen); + */ + req->rq_rvec[0].iov_len = replen; + req->rq_rvec[1].iov_base = args->buffer; + req->rq_rvec[1].iov_len = args->bufsiz; + req->rq_rlen = replen + args->bufsiz; + req->rq_rnr = 2; + + /* + dprintk("RPC: readdirargs set up reply vec:\n"); + dprintk(" rvec[0] = %p/%d\n", + req->rq_rvec[0].iov_base, + req->rq_rvec[0].iov_len); + dprintk(" rvec[1] = %p/%d\n", + req->rq_rvec[1].iov_base, + req->rq_rvec[1].iov_len); + */ + + return 0; +} + +/* + * Decode the result of a readdir call. We decode the result in place + * to avoid a malloc of NFS_MAXNAMLEN+1 for each file name. + * After decoding, the layout in memory looks like this: + * entry1 entry2 ... entryN <space> stringN ... string2 string1 + * Note that the strings are not null-terminated so that the entire number + * of entries returned by the server should fit into the buffer. + */ +static int +nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) +{ + struct nfs_entry *entry; + struct iovec *iov = req->rq_rvec; + int status, nr, len; + char *string; + u32 *end; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + if ((void *) p != ((u8 *) iov->iov_base+iov->iov_len)) { + /* Unexpected reply header size. Punt. */ + printk("NFS: Odd RPC header size in readdirres reply\n"); + return -errno_NFSERR_IO; + } + + p = (u32 *) iov[1].iov_base; + end = (u32 *) ((u8 *) p + iov[1].iov_len); + + if (p != res->buffer) { + printk("NFS: p != res->buffer in %s:%d!!!\n", + __FILE__, __LINE__); + return -errno_NFSERR_IO; + } + + string = (char *) res->buffer + res->bufsiz; + entry = (struct nfs_entry *) res->buffer; + for (nr = 0; *p++; nr++, entry++) { + entry->fileid = ntohl(*p++); + + len = ntohl(*p++); + if ((p + QUADLEN(len) + 3) > end) { + printk(KERN_NOTICE + "NFS: short packet in readdir reply!\n"); + break; + } + if (len > NFS_MAXNAMLEN) { + printk("NFS: giant filename in readdir (len %x)!\n", + len); + return -errno_NFSERR_IO; + } + string -= len; + if ((void *) (entry+1) > (void *) string) { + dprintk("NFS: shouldnothappen in readdirres_decode!\n"); + break; /* should not happen */ + } + + entry->name = string; + entry->length = len; + memmove(string, p, len); + p += QUADLEN(len); + entry->cookie = ntohl(*p++); + entry->eof = !p[0] && p[1]; + /* + dprintk("NFS: decoded dirent %.*s cookie %d eof %d\n", + len, string, entry->cookie, entry->eof); + */ + } + return nr; +} + +/* + * NFS XDR decode functions + */ +/* + * Decode void reply + */ +static int +nfs_xdr_dec_void(struct rpc_rqst *req, u32 *p, void *dummy) +{ + return 0; +} + +/* + * Decode simple status reply + */ +static int +nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy) +{ + int status; + + if ((status = ntohl(*p++)) != 0) + status = -nfs_stat_to_errno(status); + return status; +} + +/* + * Decode attrstat reply + * GETATTR, SETATTR, WRITE + */ +static int +nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status; + + dprintk("RPC: attrstat status %lx\n", ntohl(*p)); + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + xdr_decode_fattr(p, fattr); + dprintk("RPC: attrstat OK type %d mode %o dev %x ino %x\n", + fattr->type, fattr->mode, fattr->fsid, fattr->fileid); + return 0; +} + +/* + * Decode diropres reply + * LOOKUP, CREATE, MKDIR + */ +static int +nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res) +{ + int status; + + dprintk("RPC: diropres status %lx\n", ntohl(*p)); + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + p = xdr_decode_fhandle(p, res->fh); + xdr_decode_fattr(p, res->fattr); + dprintk("RPC: diropres OK type %x mode %o dev %x ino %x\n", + res->fattr->type, res->fattr->mode, + res->fattr->fsid, res->fattr->fileid); + return 0; +} + +/* + * Decode READLINK reply + */ +static int +nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res) +{ + int status; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + xdr_decode_string2(p, res->string, res->lenp, res->maxlen); + + /* Caller takes over the buffer here to avoid extra copy */ + res->buffer = req->rq_task->tk_buffer; + req->rq_task->tk_buffer = NULL; + return 0; +} + +/* + * Decode STATFS reply + */ +static int +nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +{ + int status; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + res->tsize = ntohl(*p++); + res->bsize = ntohl(*p++); + res->blocks = ntohl(*p++); + res->bfree = ntohl(*p++); + res->bavail = ntohl(*p++); + return 0; +} + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS_OK, 0 }, + { NFSERR_PERM, EPERM }, + { NFSERR_NOENT, ENOENT }, + { NFSERR_IO, errno_NFSERR_IO }, + { NFSERR_NXIO, ENXIO }, + { NFSERR_EAGAIN, EAGAIN }, + { NFSERR_ACCES, EACCES }, + { NFSERR_EXIST, EEXIST }, + { NFSERR_NODEV, ENODEV }, + { NFSERR_NOTDIR, ENOTDIR }, + { NFSERR_ISDIR, EISDIR }, + { NFSERR_INVAL, EINVAL }, + { NFSERR_FBIG, EFBIG }, + { NFSERR_NOSPC, ENOSPC }, + { NFSERR_ROFS, EROFS }, + { NFSERR_NAMETOOLONG, ENAMETOOLONG }, + { NFSERR_NOTEMPTY, ENOTEMPTY }, + { NFSERR_DQUOT, EDQUOT }, + { NFSERR_STALE, ESTALE }, +#ifdef EWFLUSH + { NFSERR_WFLUSH, EWFLUSH }, +#endif + { -1, EIO } +}; + +static int +nfs_stat_to_errno(int stat) +{ + int i; + + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + printk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); + return nfs_errtbl[i].errno; +} + +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +#define PROC(proc, argtype, restype) \ + { "nfs_" #proc, \ + (kxdrproc_t) nfs_xdr_##argtype, \ + (kxdrproc_t) nfs_xdr_##restype, \ + MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2 \ + } + +static struct rpc_procinfo nfs_procedures[18] = { + PROC(null, enc_void, dec_void), + PROC(getattr, fhandle, attrstat), + PROC(setattr, sattrargs, attrstat), + PROC(root, enc_void, dec_void), + PROC(lookup, diropargs, diropres), + PROC(readlink, fhandle, readlinkres), + PROC(read, readargs, readres), + PROC(writecache, enc_void, dec_void), + PROC(write, writeargs, attrstat), + PROC(create, createargs, diropres), + PROC(remove, diropargs, stat), + PROC(rename, renameargs, stat), + PROC(link, linkargs, stat), + PROC(symlink, symlinkargs, stat), + PROC(mkdir, createargs, diropres), + PROC(rmdir, diropargs, stat), + PROC(readdir, readdirargs, readdirres), + PROC(statfs, fhandle, statfsres), +}; + +static struct rpc_version nfs_version2 = { + 2, + sizeof(nfs_procedures)/sizeof(nfs_procedures[0]), + nfs_procedures +}; + +static struct rpc_version * nfs_version[] = { + NULL, + NULL, + &nfs_version2 +}; + +struct rpc_program nfs_program = { + "nfs", + NFS_PROGRAM, + sizeof(nfs_version) / sizeof(nfs_version[0]), + nfs_version, + &nfs_rpcstat, +}; + +/* + * RPC stats support + */ +static int +nfs_get_info(char *buffer, char **start, off_t offset, int length, int dummy) +{ + return rpcstat_get_info(&nfs_rpcstat, buffer, start, offset, length); +} + +static struct proc_dir_entry proc_nfsclnt = { + 0, 3, "nfs", + S_IFREG | S_IRUGO, 1, 0, 0, + 6, &proc_net_inode_operations, + nfs_get_info +}; + +struct rpc_stat nfs_rpcstat = { + NULL, /* next */ + &proc_nfsclnt, /* /proc/net directory entry */ + &nfs_program, /* RPC program */ +}; diff --git a/fs/nfs/nfsiod.c b/fs/nfs/nfsiod.c deleted file mode 100644 index 167c5b501..000000000 --- a/fs/nfs/nfsiod.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * linux/fs/nfs/nfsiod.c - * - * Async NFS RPC call support. - * - * When a process wants to place an asynchronous RPC call, it reserves - * an nfsiod slot, fills in all necessary fields including the callback - * handler field, and enqueues the request. - * - * This will wake up nfsiod, which calls nfs_rpc_doio to collect the - * reply. It then dispatches the result to the caller via the callback - * function, including result value and request pointer. It then re-inserts - * itself into the free list. - * - * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> - */ - -#include <linux/sched.h> -#include <linux/nfs_fs.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/rpcsock.h> -#include <linux/nfsiod.h> - -static struct nfsiod_req * free_list = NULL; -static int active = 0; - -#undef DEBUG_NFSIOD -#ifdef DEBUG_NFSIOD -#define dprintk(args...) printk(## args) -#else -#define dprintk(args...) /* nothing */ -#endif - - -/* - * Reserve an nfsiod slot and initialize the request struct - */ -struct nfsiod_req * -nfsiod_reserve(struct nfs_server *server) -{ - struct nfsiod_req *req; - - if (!(req = free_list)) { - dprintk("BIO: nfsiod_reserve: no free nfsiods\n"); - return NULL; - } - free_list = req->rq_next; - memset(&req->rq_rpcreq, 0, sizeof(struct rpc_ioreq)); - - if (rpc_reserve(server->rsock, &req->rq_rpcreq, 1) < 0) { - dprintk("BIO: nfsiod_reserve failed to reserve RPC slot\n"); - req->rq_next = free_list; - free_list = req; - return NULL; - } - - req->rq_server = server; - return req; -} - -void -nfsiod_release(struct nfsiod_req *req) -{ - dprintk("BIO: nfsiod_release called\n"); - rpc_release(req->rq_server->rsock, &req->rq_rpcreq); - memset(&req->rq_rpcreq, 0, sizeof(struct rpc_ioreq)); - req->rq_next = free_list; - free_list = req; -} - -/* - * Transmit a request and put it on nfsiod's list of pending requests. - */ -void -nfsiod_enqueue(struct nfsiod_req *req) -{ - dprintk("BIO: enqueuing request %p\n", &req->rq_rpcreq); - wake_up(&req->rq_wait); - schedule(); -} - -/* - * This is the main nfsiod loop. - */ -int -nfsiod(void) -{ - struct nfsiod_req request, *req = &request; - int result; - - dprintk("BIO: nfsiod %d starting\n", current->pid); - while (1) { - /* Insert request into free list */ - memset(req, 0, sizeof(*req)); - req->rq_next = free_list; - free_list = req; - - /* Wait until user enqueues request */ - dprintk("BIO: before: now %d nfsiod's active\n", active); - dprintk("BIO: nfsiod %d waiting\n", current->pid); - interruptible_sleep_on(&req->rq_wait); - - if (current->signal & ~current->blocked) - break; - if (!req->rq_rpcreq.rq_slot) - continue; - dprintk("BIO: nfsiod %d woken up; calling nfs_rpc_doio.\n", - current->pid); - active++; - dprintk("BIO: before: now %d nfsiod's active\n", active); - do { - result = nfs_rpc_doio(req->rq_server, - &req->rq_rpcreq, 1); - } while (!req->rq_callback(result, req)); - active--; - } - - return 0; -} diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 2f12cae9b..732874d7f 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -53,6 +53,7 @@ * DNS domain!). Skip dummy devices for BOOTP. * Jacek Zapala : Fixed a bug which prevented server-ip address * from nfsroot parameter from being used. + * Olaf Kirch : Adapted to new NFS code. * */ @@ -79,12 +80,14 @@ #include <linux/net.h> #include <linux/netdevice.h> #include <linux/if_arp.h> -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#ifdef CONFIG_AX25 #include <net/ax25.h> /* For AX25_P_IP */ #endif #include <linux/skbuff.h> +#include <linux/ip.h> #include <linux/socket.h> #include <linux/route.h> +#include <linux/sunrpc/clnt.h> #include <linux/nfs.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> @@ -92,12 +95,14 @@ #include <net/route.h> #include <net/sock.h> +#include <asm/segment.h> #include <asm/uaccess.h> +#define NFSDBG_FACILITY NFSDBG_ROOT /* Range of privileged ports */ -#define STARTPORT 600 -#define ENDPORT 1023 -#define NPORTS (ENDPORT - STARTPORT + 1) +#define STARTPORT 600 +#define ENDPORT 1023 +#define NPORTS (ENDPORT - STARTPORT + 1) /* Define the timeout for waiting for a RARP/BOOTP reply */ @@ -121,10 +126,10 @@ static struct open_dev *open_base = NULL; /* IP configuration */ static struct device *root_dev = NULL; /* Device selected for booting */ static char user_dev_name[IFNAMSIZ]; /* Name of user-selected boot device */ -static struct sockaddr_in myaddr; /* My IP address */ -static struct sockaddr_in server; /* Server IP address */ -static struct sockaddr_in gateway; /* Gateway IP address */ -static struct sockaddr_in netmask; /* Netmask for local subnet */ +static __u32 myaddr; /* My IP address */ +static __u32 servaddr; /* Server IP address */ +static __u32 gateway; /* Gateway IP address */ +static __u32 netmask; /* Netmask for local subnet */ /* BOOTP/RARP variables */ @@ -132,7 +137,7 @@ static int bootp_flag; /* User said: Use BOOTP! */ static int rarp_flag; /* User said: Use RARP! */ static int bootp_dev_count = 0; /* Number of devices allowing BOOTP */ static int rarp_dev_count = 0; /* Number of devices allowing RARP */ -static struct sockaddr_in rarp_serv; /* IP address of RARP server */ +static __u32 rarp_serv; /* IP address of RARP server */ #if defined(CONFIG_RNFS_BOOTP) || defined(CONFIG_RNFS_RARP) #define CONFIG_RNFS_DYNAMIC /* Enable dynamic IP config */ @@ -145,8 +150,9 @@ static volatile int pkt_arrived; /* BOOTP/RARP packet detected */ /* NFS-related data */ static struct nfs_mount_data nfs_data; /* NFS mount info */ -static char nfs_path[NFS_MAXPATHLEN] = ""; /* Name of directory to mount */ -static int nfs_port; /* Port to connect to for NFS */ +static char nfs_path[NFS_MAXPATHLEN]; /* Name of directory to mount */ +static int nfs_port; /* Port to connect to for NFS */ +static int mount_port; /* Mount daemon port number */ /* Yes, we use sys_socket, but there's no include file for it */ @@ -195,9 +201,7 @@ static int root_dev_open(void) bootp_dev_count++; if (!(dev->flags & IFF_NOARP)) rarp_dev_count++; -#ifdef NFSROOT_DEBUG - printk(KERN_NOTICE "Root-NFS: Opened %s\n", dev->name); -#endif + dprintk("Root-NFS: Opened %s\n", dev->name); } } *last = NULL; @@ -209,6 +213,58 @@ static int root_dev_open(void) return 0; } +static inline void +set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port) +{ + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = addr; + sin->sin_port = port; +} + +static int +root_dev_chg_route(int op, struct device *dev, __u32 dest, __u32 mask, __u32 gw) +{ + struct rtentry route; + unsigned long oldfs; + int err; + + route.rt_dev = dev->name; + route.rt_mtu = dev->mtu; + route.rt_flags = RTF_UP; + set_sockaddr((struct sockaddr_in *) &route.rt_dst, dest & mask, 0); + set_sockaddr((struct sockaddr_in *) &route.rt_genmask, mask, 0); + + if (gw != 0) { + set_sockaddr((struct sockaddr_in *) &route.rt_gateway, gw, 0); + route.rt_flags |= RTF_GATEWAY; + if ((gw ^ myaddr) & netmask) { + printk(KERN_ERR "Root-NFS: Gateway not on local network!\n"); + return -ENETUNREACH; + } + } + + oldfs = get_fs(); + set_fs(KERNEL_DS); + err = ip_rt_ioctl(op, &route); + set_fs(oldfs); + printk(KERN_NOTICE "%s route %s %s %s: res %d\n", + (op == SIOCADDRT? "add" : "del"), + in_ntoa(dest), in_ntoa(mask), in_ntoa(gw), err); + + return err; +} + +static int +root_dev_add_route(struct device *dev, __u32 dest, __u32 mask, __u32 gateway) +{ + return root_dev_chg_route(SIOCADDRT, dev, dest, mask, gateway); +} + +static int +root_dev_del_route(struct device *dev, __u32 dest, __u32 mask, __u32 gateway) +{ + return root_dev_chg_route(SIOCDELRT, dev, dest, mask, gateway); +} /* * Restore the state of all devices. However, keep the root device open @@ -306,7 +362,7 @@ static int root_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet /* If it's not ethernet or AX25, delete it. */ if ((rarp->ar_pro != htons(ETH_P_IP) && dev->type != ARPHRD_AX25) || -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#ifdef CONFIG_AX25 (rarp->ar_pro != htons(AX25_P_IP) && dev->type == ARPHRD_AX25) || #endif rarp->ar_pln != 4) { @@ -330,8 +386,8 @@ static int root_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet } /* Discard packets which are not from specified server. */ if (rarp_flag && !bootp_flag && - rarp_serv.sin_addr.s_addr != INADDR_NONE && - rarp_serv.sin_addr.s_addr != sip) { + rarp_serv != INADDR_NONE && + rarp_serv != sip) { kfree_skb(skb, FREE_READ); return 0; } @@ -350,14 +406,10 @@ static int root_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet sti(); root_dev = dev; - if (myaddr.sin_addr.s_addr == INADDR_NONE) { - myaddr.sin_family = dev->family; - myaddr.sin_addr.s_addr = tip; - } - if (server.sin_addr.s_addr == INADDR_NONE) { - server.sin_family = dev->family; - server.sin_addr.s_addr = sip; - } + if (myaddr == INADDR_NONE) + myaddr = tip; + if (servaddr == INADDR_NONE) + servaddr = sip; kfree_skb(skb, FREE_READ); return 0; } @@ -395,10 +447,8 @@ static void root_rarp_send(void) static struct device *bootp_dev = NULL; /* Device selected as best BOOTP target */ -static int bootp_xmit_fd = -1; /* Socket descriptor for transmit */ -static struct socket *bootp_xmit_sock; /* The socket itself */ -static int bootp_recv_fd = -1; /* Socket descriptor for receive */ -static struct socket *bootp_recv_sock; /* The socket itself */ +static struct socket *bootp_xmit_sock; /* BOOTP send socket */ +static struct socket *bootp_recv_sock; /* BOOTP receive socket */ struct bootp_pkt { /* BOOTP packet format */ u8 op; /* 1=request, 2=reply */ @@ -462,18 +512,8 @@ static inline int root_alloc_bootp(void) */ static int root_add_bootp_route(void) { - struct rtentry route; - - memset(&route, 0, sizeof(route)); - route.rt_dev = bootp_dev->name; - route.rt_mss = bootp_dev->mtu; - route.rt_flags = RTF_UP; - ((struct sockaddr_in *) &(route.rt_dst)) -> sin_addr.s_addr = 0; - ((struct sockaddr_in *) &(route.rt_dst)) -> sin_family = AF_INET; - ((struct sockaddr_in *) &(route.rt_genmask)) -> sin_addr.s_addr = 0; - ((struct sockaddr_in *) &(route.rt_genmask)) -> sin_family = AF_INET; - if (ip_rt_new(&route)) { - printk(KERN_ERR "BOOTP: Adding of route failed!\n"); + if (root_dev_add_route(bootp_dev, 0, 0, 0) < 0) { + printk(KERN_ERR "BOOTP: Failed to add route\n"); return -1; } bootp_have_route = 1; @@ -486,14 +526,7 @@ static int root_add_bootp_route(void) */ static int root_del_bootp_route(void) { - struct rtentry route; - - if (!bootp_have_route) - return 0; - memset(&route, 0, sizeof(route)); - ((struct sockaddr_in *) &(route.rt_dst)) -> sin_addr.s_addr = 0; - ((struct sockaddr_in *) &(route.rt_genmask)) -> sin_addr.s_addr = 0; - if (ip_rt_kill(&route)) { + if (bootp_have_route && root_dev_del_route(bootp_dev, 0, 0, 0) < 0) { printk(KERN_ERR "BOOTP: Deleting of route failed!\n"); return -1; } @@ -505,21 +538,13 @@ static int root_del_bootp_route(void) /* * Open UDP socket. */ -static int root_open_udp_sock(int *fd, struct socket **sock) +static int root_open_udp_sock(struct socket **sock) { - struct file *file; - struct inode *inode; - - *fd = sys_socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (*fd >= 0) { - file = current->files->fd[*fd]; - inode = file->f_inode; - *sock = &inode->u.socket_i; - return 0; - } + int err; - printk(KERN_ERR "BOOTP: Cannot open UDP socket!\n"); - return -1; + if ((err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_UDP, sock)) < 0) + printk(KERN_ERR "BOOTP: Cannot open UDP socket!\n"); + return err; } @@ -531,9 +556,7 @@ static int root_connect_udp_sock(struct socket *sock, u32 addr, u16 port) struct sockaddr_in sa; int result; - sa.sin_family = AF_INET; - sa.sin_addr.s_addr = htonl(addr); - sa.sin_port = htons(port); + set_sockaddr(&sa, htonl(addr), htonl(port)); result = sock->ops->connect(sock, (struct sockaddr *) &sa, sizeof(sa), 0); if (result < 0) { printk(KERN_ERR "BOOTP: connect() failed\n"); @@ -551,9 +574,7 @@ static int root_bind_udp_sock(struct socket *sock, u32 addr, u16 port) struct sockaddr_in sa; int result; - sa.sin_family = AF_INET; - sa.sin_addr.s_addr = htonl(addr); - sa.sin_port = htons(port); + set_sockaddr(&sa, htonl(addr), htonl(port)); result = sock->ops->bind(sock, (struct sockaddr *) &sa, sizeof(sa)); if (result < 0) { printk(KERN_ERR "BOOTP: bind() failed\n"); @@ -577,12 +598,12 @@ static inline int root_send_udp(struct socket *sock, void *buf, int size) set_fs(get_ds()); iov.iov_base = buf; iov.iov_len = size; - msg.msg_name = NULL; + memset(&msg, 0, sizeof(msg)); msg.msg_iov = &iov; msg.msg_iovlen = 1; - msg.msg_control = NULL; - result = sock->ops->sendmsg(sock, &msg, size, 0, 0); + result = sock_sendmsg(sock, &msg, size); set_fs(oldfs); + return (result != size); } @@ -601,12 +622,11 @@ static inline int root_recv_udp(struct socket *sock, void *buf, int size) set_fs(get_ds()); iov.iov_base = buf; iov.iov_len = size; - msg.msg_name = NULL; + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; msg.msg_iov = &iov; msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_namelen = 0; - result = sock->ops->recvmsg(sock, &msg, size, O_NONBLOCK, 0, &msg.msg_namelen); + result = sock_recvmsg(sock, &msg, size, MSG_DONTWAIT); set_fs(oldfs); return result; } @@ -645,10 +665,10 @@ static void root_bootp_init_ext(u8 *e) */ static void root_bootp_close(void) { - if (bootp_xmit_fd != -1) - sys_close(bootp_xmit_fd); - if (bootp_recv_fd != -1) - sys_close(bootp_recv_fd); + if (bootp_xmit_sock) + sock_release(bootp_xmit_sock); + if (bootp_recv_sock) + sock_release(bootp_recv_sock); root_del_bootp_route(); root_free_bootp(); } @@ -697,6 +717,7 @@ static int root_bootp_open(void) xmit_bootp->hlen = best_dev->addr_len; memcpy(xmit_bootp->hw_addr, best_dev->dev_addr, best_dev->addr_len); root_bootp_init_ext(xmit_bootp->vendor_area); + #ifdef NFSROOT_BOOTP_DEBUG { int x; @@ -716,14 +737,14 @@ static int root_bootp_open(void) return -1; /* Open the sockets */ - if (root_open_udp_sock(&bootp_xmit_fd, &bootp_xmit_sock) || - root_open_udp_sock(&bootp_recv_fd, &bootp_recv_sock)) + if (root_open_udp_sock(&bootp_xmit_sock) || + root_open_udp_sock(&bootp_recv_sock)) return -1; /* Bind/connect the sockets */ - ((struct sock *) bootp_xmit_sock->data) -> broadcast = 1; - ((struct sock *) bootp_xmit_sock->data) -> reuse = 1; - ((struct sock *) bootp_recv_sock->data) -> reuse = 1; + bootp_xmit_sock->sk->broadcast = 1; + bootp_xmit_sock->sk->reuse = 1; + bootp_recv_sock->sk->reuse = 1; if (root_bind_udp_sock(bootp_recv_sock, INADDR_ANY, 68) || root_bind_udp_sock(bootp_xmit_sock, INADDR_ANY, 68) || root_connect_udp_sock(bootp_xmit_sock, INADDR_BROADCAST, 67)) @@ -774,12 +795,12 @@ static void root_do_bootp_ext(u8 *ext) switch (*ext++) { case 1: /* Subnet mask */ - if (netmask.sin_addr.s_addr == INADDR_NONE) - memcpy(&netmask.sin_addr.s_addr, ext+1, 4); + if (netmask == INADDR_NONE) + memcpy(&netmask, ext+1, 4); break; case 3: /* Default gateway */ - if (gateway.sin_addr.s_addr == INADDR_NONE) - memcpy(&gateway.sin_addr.s_addr, ext+1, 4); + if (gateway == INADDR_NONE) + memcpy(&gateway, ext+1, 4); break; case 12: /* Host name */ root_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN); @@ -812,9 +833,7 @@ static void root_bootp_recv(void) recv_bootp->htype != xmit_bootp->htype || recv_bootp->hlen != xmit_bootp->hlen || recv_bootp->xid != xmit_bootp->xid) { -#ifdef NFSROOT_BOOTP_DEBUG - printk("?"); -#endif + dprintk("?"); return; } @@ -829,9 +848,9 @@ static void root_bootp_recv(void) root_dev = bootp_dev; /* Extract basic fields */ - myaddr.sin_addr.s_addr = recv_bootp->your_ip; - if (server.sin_addr.s_addr==INADDR_NONE) - server.sin_addr.s_addr = recv_bootp->server_ip; + myaddr = recv_bootp->your_ip; + if (servaddr==INADDR_NONE) + servaddr = recv_bootp->server_ip; /* Parse extensions */ if (recv_bootp->vendor_area[0] == 99 && /* Check magic cookie */ @@ -985,14 +1004,28 @@ static int root_auto_config(void) printk(" OK\n"); printk(KERN_NOTICE "Root-NFS: Got %s answer from %s, ", (pkt_arrived == ARRIVED_BOOTP) ? "BOOTP" : "RARP", - in_ntoa(server.sin_addr.s_addr)); - printk("my address is %s\n", in_ntoa(myaddr.sin_addr.s_addr)); + in_ntoa(servaddr)); + printk("my address is %s\n", in_ntoa(myaddr)); return 0; } #endif - +/* Get default netmask - used to be exported from net/ipv4 */ +static unsigned long +ip_get_mask(unsigned long addr) +{ + if (!addr) + return 0; + addr = ntohl(addr); + if (IN_CLASSA(addr)) + return htonl(IN_CLASSA_NET); + if (IN_CLASSB(addr)) + return htonl(IN_CLASSB_NET); + if (IN_CLASSC(addr)) + return htonl(IN_CLASSC_NET); + return 0; +} /*************************************************************************** @@ -1069,13 +1102,13 @@ static int root_nfs_name(char *name) if (octets == 4 && (*cp == ':' || *cp == '\0')) { if (*cp == ':') *cp++ = '\0'; - server.sin_addr.s_addr = in_aton(name); + servaddr = in_aton(name); name = cp; } /* Clear the nfs_data structure and setup the server hostname */ memset(&nfs_data, 0, sizeof(nfs_data)); - strncpy(nfs_data.hostname, in_ntoa(server.sin_addr.s_addr), + strncpy(nfs_data.hostname, in_ntoa(servaddr), sizeof(nfs_data.hostname)-1); /* Set the name of the directory to mount */ @@ -1087,7 +1120,7 @@ static int root_nfs_name(char *name) *options++ = '\0'; if (!strcmp(buf, "default")) strcpy(buf, NFS_ROOT); - cp = in_ntoa(myaddr.sin_addr.s_addr); + cp = in_ntoa(myaddr); if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); return -1; @@ -1098,10 +1131,11 @@ static int root_nfs_name(char *name) /* Set some default values */ nfs_port = -1; - nfs_data.version = NFS_MOUNT_VERSION; - nfs_data.flags = 0; + nfs_data.version = NFS_MNT_VERSION; + nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + nfs_data.bsize = 0; nfs_data.timeo = 7; nfs_data.retrans = 3; nfs_data.acregmin = 3; @@ -1139,17 +1173,17 @@ static int root_nfs_name(char *name) /* * Tell the user what's going on. */ -#ifdef NFSROOT_DEBUG +#ifdef NFSROOT_BOOTP static void root_nfs_print(void) { #define IN_NTOA(x) (((x) == INADDR_NONE) ? "none" : in_ntoa(x)) printk(KERN_NOTICE "Root-NFS: IP config: dev=%s, ", root_dev ? root_dev->name : "none"); - printk("local=%s, ", IN_NTOA(myaddr.sin_addr.s_addr)); - printk("server=%s, ", IN_NTOA(server.sin_addr.s_addr)); - printk("gw=%s, ", IN_NTOA(gateway.sin_addr.s_addr)); - printk("mask=%s, ", IN_NTOA(netmask.sin_addr.s_addr)); + printk("local=%s, ", IN_NTOA(myaddr)); + printk("server=%s, ", IN_NTOA(servaddr)); + printk("gw=%s, ", IN_NTOA(gateway)); + printk("mask=%s, ", IN_NTOA(netmask)); printk("host=%s, domain=%s\n", system_utsname.nodename[0] ? system_utsname.nodename : "none", system_utsname.domainname[0] ? system_utsname.domainname : "none"); @@ -1193,10 +1227,7 @@ static void root_nfs_addrs(char *addrs) int num = 0; /* Clear all addresses and strings */ - myaddr.sin_family = server.sin_family = rarp_serv.sin_family = - gateway.sin_family = netmask.sin_family = AF_INET; - myaddr.sin_addr.s_addr = server.sin_addr.s_addr = rarp_serv.sin_addr.s_addr = - gateway.sin_addr.s_addr = netmask.sin_addr.s_addr = INADDR_NONE; + myaddr = servaddr = rarp_serv = gateway = netmask = INADDR_NONE; system_utsname.nodename[0] = '\0'; system_utsname.domainname[0] = '\0'; user_dev_name[0] = '\0'; @@ -1219,26 +1250,24 @@ static void root_nfs_addrs(char *addrs) if ((cp = strchr(ip, ':'))) *cp++ = '\0'; if (strlen(ip) > 0) { -#ifdef NFSROOT_DEBUG - printk(KERN_NOTICE "Root-NFS: Config string num %d is \"%s\"\n", + dprintk("Root-NFS: Config string num %d is \"%s\"\n", num, ip); -#endif switch (num) { case 0: - if ((myaddr.sin_addr.s_addr = in_aton(ip)) == INADDR_ANY) - myaddr.sin_addr.s_addr = INADDR_NONE; + if ((myaddr = in_aton(ip)) == INADDR_ANY) + myaddr = INADDR_NONE; break; case 1: - if ((server.sin_addr.s_addr = in_aton(ip)) == INADDR_ANY) - server.sin_addr.s_addr = INADDR_NONE; + if ((servaddr = in_aton(ip)) == INADDR_ANY) + servaddr = INADDR_NONE; break; case 2: - if ((gateway.sin_addr.s_addr = in_aton(ip)) == INADDR_ANY) - gateway.sin_addr.s_addr = INADDR_NONE; + if ((gateway = in_aton(ip)) == INADDR_ANY) + gateway = INADDR_NONE; break; case 3: - if ((netmask.sin_addr.s_addr = in_aton(ip)) == INADDR_ANY) - netmask.sin_addr.s_addr = INADDR_NONE; + if ((netmask = in_aton(ip)) == INADDR_ANY) + netmask = INADDR_NONE; break; case 4: if ((dp = strchr(ip, '.'))) { @@ -1268,7 +1297,7 @@ static void root_nfs_addrs(char *addrs) ip = cp; num++; } - rarp_serv = server; + rarp_serv = servaddr; } @@ -1277,22 +1306,20 @@ static void root_nfs_addrs(char *addrs) */ static int root_nfs_setup(void) { - struct rtentry route; - /* Set the default system name in case none was previously found */ if (!system_utsname.nodename[0]) { - strncpy(system_utsname.nodename, in_ntoa(myaddr.sin_addr.s_addr), __NEW_UTS_LEN); + strncpy(system_utsname.nodename, in_ntoa(myaddr), __NEW_UTS_LEN); system_utsname.nodename[__NEW_UTS_LEN] = '\0'; } /* Set the correct netmask */ - if (netmask.sin_addr.s_addr == INADDR_NONE) - netmask.sin_addr.s_addr = ip_get_mask(myaddr.sin_addr.s_addr); + if (netmask == INADDR_NONE) + netmask = ip_get_mask(myaddr); /* Setup the device correctly */ - root_dev->family = myaddr.sin_family; - root_dev->pa_addr = myaddr.sin_addr.s_addr; - root_dev->pa_mask = netmask.sin_addr.s_addr; + root_dev->family = AF_INET; + root_dev->pa_addr = myaddr; + root_dev->pa_mask = netmask; root_dev->pa_brdaddr = root_dev->pa_addr | ~root_dev->pa_mask; root_dev->pa_dstaddr = 0; @@ -1304,32 +1331,18 @@ static int root_nfs_setup(void) * gatewayed default route. Note that this gives sufficient network * setup even for full system operation in all common cases. */ - memset(&route, 0, sizeof(route)); /* Local subnet route */ - route.rt_dev = root_dev->name; - route.rt_mss = root_dev->mtu; - route.rt_flags = RTF_UP; - *((struct sockaddr_in *) &(route.rt_dst)) = myaddr; - (((struct sockaddr_in *) &(route.rt_dst)))->sin_addr.s_addr &= netmask.sin_addr.s_addr; - *((struct sockaddr_in *) &(route.rt_genmask)) = netmask; - if (ip_rt_new(&route)) { + if (root_dev_add_route(root_dev, myaddr, netmask, 0)) + { printk(KERN_ERR "Root-NFS: Adding of local route failed!\n"); return -1; } - if (gateway.sin_addr.s_addr != INADDR_NONE) { /* Default route */ - (((struct sockaddr_in *) &(route.rt_dst)))->sin_addr.s_addr = INADDR_ANY; - (((struct sockaddr_in *) &(route.rt_genmask)))->sin_addr.s_addr = INADDR_ANY; - *((struct sockaddr_in *) &(route.rt_gateway)) = gateway; - route.rt_flags |= RTF_GATEWAY; - if ((gateway.sin_addr.s_addr ^ myaddr.sin_addr.s_addr) & netmask.sin_addr.s_addr) { - printk(KERN_ERR "Root-NFS: Gateway not on local network!\n"); - return -1; - } - if (ip_rt_new(&route)) { + if (gateway != INADDR_NONE) { /* Default route */ + if (root_dev_add_route(root_dev, INADDR_ANY, INADDR_ANY, gateway)) { printk(KERN_ERR "Root-NFS: Adding of default route failed!\n"); return -1; } - } else if ((server.sin_addr.s_addr ^ myaddr.sin_addr.s_addr) & netmask.sin_addr.s_addr) { + } else if ((servaddr ^ myaddr) & netmask) { printk(KERN_ERR "Root-NFS: Boot server not on local network and no default gateway configured!\n"); return -1; } @@ -1344,6 +1357,10 @@ static int root_nfs_setup(void) */ int nfs_root_init(char *nfsname, char *nfsaddrs) { +#ifdef NFSROOT_DEBUG + nfs_debug |= NFSDBG_ROOT; +#endif + /* * Decode IP addresses and other configuration info contained * in the nfsaddrs string (which came from the kernel command @@ -1369,8 +1386,8 @@ int nfs_root_init(char *nfsname, char *nfsaddrs) * in the (diskless) system and if the server is on another subnet. * If only one interface is installed, the routing is obvious. */ - if ((myaddr.sin_addr.s_addr == INADDR_NONE || - server.sin_addr.s_addr == INADDR_NONE || + if ((myaddr == INADDR_NONE || + servaddr == INADDR_NONE || (open_base != NULL && open_base->next != NULL)) #ifdef CONFIG_RNFS_DYNAMIC && root_auto_config() < 0 @@ -1424,206 +1441,50 @@ int nfs_root_init(char *nfsname, char *nfsaddrs) Routines to actually mount the root directory ***************************************************************************/ - -static struct file *nfs_file; /* File descriptor pointing to inode */ -static struct inode *nfs_sock_inode; /* Inode containing socket */ -static int *rpc_packet = NULL; /* RPC packet */ - - -/* - * Open a UDP socket. - */ -static int root_nfs_open(void) -{ - /* Open the socket */ - if ((nfs_data.fd = sys_socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) { - printk(KERN_ERR "Root-NFS: Cannot open UDP socket for NFS!\n"); - return -1; - } - nfs_file = current->files->fd[nfs_data.fd]; - nfs_sock_inode = nfs_file->f_inode; - return 0; -} - - -/* - * Close the UDP file descriptor. If nfs_read_super is successful, it - * increases the reference count, so we can simply close the file, and - * the socket keeps open. - */ -static void root_nfs_close(void) -{ - /* - * The following close doesn't touch the server structure, which - * now contains a file pointer pointing into nowhere. The system - * _should_ crash as soon as someone tries to select on the root - * filesystem. Haven't tried it yet - we can still change it back - * to the old way of keeping a static copy of all important data - * structures, including their pointers. At least this should be - * checked out _carefully_ before going into a public release - * kernel. - GK - */ - sys_close(nfs_data.fd); -} - - -/* - * Find a suitable listening port and bind to it - */ -static int root_nfs_bind(void) -{ - int res = -1; - short port = STARTPORT; - struct sockaddr_in *sin = &myaddr; - int i; - - if (nfs_sock_inode->u.socket_i.ops->bind) { - for (i = 0; i < NPORTS && res < 0; i++) { - sin->sin_port = htons(port++); - if (port > ENDPORT) { - port = STARTPORT; - } - res = nfs_sock_inode->u.socket_i.ops->bind(&nfs_sock_inode->u.socket_i, - (struct sockaddr *)sin, - sizeof(struct sockaddr_in)); - } - } - if (res < 0) { - printk(KERN_ERR "Root-NFS: Cannot find a suitable listening port\n"); - root_nfs_close(); - return -1; - } -#ifdef NFSROOT_DEBUG - printk(KERN_NOTICE "Root-NFS: Binding to listening port %d\n", port); -#endif - return 0; -} - - -/* - * Send an RPC request and wait for the answer - */ -static int *root_nfs_call(int *end) -{ - struct socket *sock; - int dummylen; - static struct nfs_server s = { - 0, /* struct file * */ - 0, /* struct rsock * */ - { - 0, "", - }, /* toaddr */ - 0, /* lock */ - NULL, /* wait queue */ - NFS_MOUNT_SOFT, /* flags */ - 0, 0, /* rsize, wsize */ - 0, /* timeo */ - 0, /* retrans */ - 3 * HZ, 60 * HZ, 30 * HZ, 60 * HZ, "\0" - }; - - s.file = nfs_file; - sock = &((nfs_file->f_inode)->u.socket_i); - - /* Extract the other end of the socket into s->toaddr */ - sock->ops->getname(sock, &(s.toaddr), &dummylen, 1); - ((struct sockaddr_in *) &s.toaddr)->sin_port = server.sin_port; - ((struct sockaddr_in *) &s.toaddr)->sin_family = server.sin_family; - ((struct sockaddr_in *) &s.toaddr)->sin_addr.s_addr = server.sin_addr.s_addr; - - s.rsock = rpc_makesock(nfs_file); - s.flags = nfs_data.flags; - s.rsize = nfs_data.rsize; - s.wsize = nfs_data.wsize; - s.timeo = nfs_data.timeo * HZ / 10; - s.retrans = nfs_data.retrans; - strcpy(s.hostname, nfs_data.hostname); - - /* - * First connect the UDP socket to a server port, then send the - * packet out, and finally check whether the answer is OK. - */ - if (nfs_sock_inode->u.socket_i.ops->connect && - nfs_sock_inode->u.socket_i.ops->connect(&nfs_sock_inode->u.socket_i, - (struct sockaddr *) &server, - sizeof(struct sockaddr_in), - nfs_file->f_flags) < 0) - return NULL; - if (nfs_rpc_call(&s, rpc_packet, end, nfs_data.wsize) < 0) - return NULL; - return rpc_verify(rpc_packet); -} - - -/* - * Create an RPC packet header - */ -static int *root_nfs_header(int proc, int program, int version) -{ - gid_t groups[] = { 0 }; - - if (rpc_packet == NULL) { - if (!(rpc_packet = kmalloc(nfs_data.wsize + 1024, GFP_NFS))) { - printk(KERN_ERR "Root-NFS: Cannot allocate UDP buffer\n"); - return NULL; - } - } - return rpc_header(rpc_packet, proc, program, version, 0, 0, 1, groups); -} - - /* * Query server portmapper for the port of a daemon program */ -static int root_nfs_get_port(int program, int version) +static int root_nfs_getport(int program, int version) { - int *p; + struct sockaddr_in sin; - /* Prepare header for portmap request */ - server.sin_port = htons(NFS_PMAP_PORT); - p = root_nfs_header(NFS_PMAP_PROC, NFS_PMAP_PROGRAM, NFS_PMAP_VERSION); - if (!p) - return -1; - - /* Set arguments for portmapper */ - *p++ = htonl(program); - *p++ = htonl(version); - *p++ = htonl(IPPROTO_UDP); - *p++ = 0; - - /* Send request to server portmapper */ - if ((p = root_nfs_call(p)) == NULL) - return -1; - - return ntohl(*p); +printk(KERN_NOTICE "Looking up port of RPC %d/%d on %s\n", + program, version, in_ntoa(servaddr)); + set_sockaddr(&sin, servaddr, 0); + return rpc_getport_external(&sin, program, version, IPPROTO_UDP); } /* * Get portnumbers for mountd and nfsd from server + * The RPC layer does support portmapper queries; the only reason to + * keep this code is that we may want to use fallback ports. But is there + * actually someone who does not run portmap? */ static int root_nfs_ports(void) { - int port; + int port; if (nfs_port < 0) { - if ((port = root_nfs_get_port(NFS_NFS_PROGRAM, NFS_NFS_VERSION)) < 0) { - printk(KERN_ERR "Root-NFS: Unable to get nfsd port number from server, using default\n"); - port = NFS_NFS_PORT; + if ((port = root_nfs_getport(NFS_PROGRAM, NFS_VERSION)) < 0) { + printk(KERN_ERR "Root-NFS: Unable to get nfsd port " + "number from server, using default\n"); + port = NFS_PORT; } nfs_port = port; -#ifdef NFSROOT_DEBUG - printk(KERN_NOTICE "Root-NFS: Portmapper on server returned %d as nfsd port\n", port); -#endif + dprintk("Root-NFS: Portmapper on server returned %d " + "as nfsd port\n", port); } - if ((port = root_nfs_get_port(NFS_MOUNT_PROGRAM, NFS_MOUNT_VERSION)) < 0) { - printk(KERN_ERR "Root-NFS: Unable to get mountd port number from server, using default\n"); - port = NFS_MOUNT_PORT; + + if ((port = root_nfs_getport(NFS_MNT_PROGRAM, NFS_MNT_VERSION)) < 0) { + printk(KERN_ERR "Root-NFS: Unable to get mountd port " + "number from server, using default\n"); + port = NFS_MNT_PORT; } - server.sin_port = htons(port); -#ifdef NFSROOT_DEBUG - printk(KERN_NOTICE "Root-NFS: Portmapper on server returned %d as mountd port\n", port); -#endif + + mount_port = htons(port); + dprintk("Root-NFS: Portmapper on server returned %d " + "as mountd port\n", port); return 0; } @@ -1635,40 +1496,16 @@ static int root_nfs_ports(void) */ static int root_nfs_get_handle(void) { - int len, status, *p; - - /* Prepare header for mountd request */ - p = root_nfs_header(NFS_MOUNT_PROC, NFS_MOUNT_PROGRAM, NFS_MOUNT_VERSION); - if (!p) { - root_nfs_close(); - return -1; - } + struct sockaddr_in sin; + int status; - /* Set arguments for mountd */ - len = strlen(nfs_path); - *p++ = htonl(len); - memcpy(p, nfs_path, len); - len = (len + 3) >> 2; - p[len] = 0; - p += len; - - /* Send request to server mountd */ - if ((p = root_nfs_call(p)) == NULL) { - root_nfs_close(); - return -1; - } - status = ntohl(*p++); - if (status == 0) { - nfs_data.root = *((struct nfs_fh *) p); - printk(KERN_NOTICE "Root-NFS: Got file handle for %s via RPC\n", nfs_path); - } else { - printk(KERN_ERR "Root-NFS: Server returned error %d while mounting %s\n", - status, nfs_path); - root_nfs_close(); - return -1; - } + set_sockaddr(&sin, servaddr, mount_port); + status = nfs_mount(&sin, nfs_path, &nfs_data.root); + if (status < 0) + printk(KERN_ERR "Root-NFS: Server returned error %d " + "while mounting %s\n", status, nfs_path); - return 0; + return status; } @@ -1677,23 +1514,12 @@ static int root_nfs_get_handle(void) */ static int root_nfs_do_mount(struct super_block *sb) { - /* First connect to the nfsd port on the server */ - server.sin_port = htons(nfs_port); - nfs_data.addr = server; - if (nfs_sock_inode->u.socket_i.ops->connect && - nfs_sock_inode->u.socket_i.ops->connect(&nfs_sock_inode->u.socket_i, - (struct sockaddr *) &server, - sizeof(struct sockaddr_in), - nfs_file->f_flags) < 0) { - root_nfs_close(); - return -1; - } + /* Pass the server address to NFS */ + set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port); /* Now (finally ;-)) read the super block for mounting */ - if (nfs_read_super(sb, &nfs_data, 1) == NULL) { - root_nfs_close(); + if (nfs_read_super(sb, &nfs_data, 1) == NULL) return -1; - } return 0; } @@ -1704,16 +1530,11 @@ static int root_nfs_do_mount(struct super_block *sb) */ int nfs_root_mount(struct super_block *sb) { - if (root_nfs_open() < 0) - return -1; - if (root_nfs_bind() < 0) - return -1; if (root_nfs_ports() < 0) return -1; if (root_nfs_get_handle() < 0) return -1; if (root_nfs_do_mount(sb) < 0) return -1; - root_nfs_close(); return 0; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ff71bd631..714101bb7 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -21,1017 +21,272 @@ * it decodes the packet. * * Feel free to fix it and mail me the diffs if it worries you. + * + * Completely rewritten to support the new RPC call interface; + * rewrote and moved the entire XDR stuff to xdr.c + * --Olaf Kirch June 1996 */ -/* - * Defining NFS_PROC_DEBUG causes a lookup of a file named - * "xyzzy" to toggle debugging. Just cd to an NFS-mounted - * filesystem and type 'ls xyzzy' to turn on debugging. - */ - -#if 0 -#define NFS_PROC_DEBUG -#endif +#define NFS_NEED_XDR_TYPES #include <linux/param.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/malloc.h> -#include <linux/nfs_fs.h> #include <linux/utsname.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> #include <linux/pagemap.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> -#include <asm/uaccess.h> - -#ifdef NFS_PROC_DEBUG - -static int proc_debug = 0; -#define PRINTK(format, args...) \ - do { \ - if (proc_debug) \ - printk(format , ## args); \ - } while (0) - -#else /* !NFS_PROC_DEBUG */ - -#define PRINTK(format, args...) do ; while (0) - -#endif /* !NFS_PROC_DEBUG */ - -/* Mapping from NFS error code to "errno" error code. */ -#define errno_NFSERR_IO EIO - -static int *nfs_rpc_header(int *p, int procedure, int ruid); -static int *nfs_rpc_verify(int *p); -static int nfs_stat_to_errno(int stat); - -/* - * Our memory allocation and release functions. - */ - -#define NFS_SLACK_SPACE 1024 /* Total overkill */ -/* !!! Be careful, this constant is now also used in sock.c... - We should easily convert to not using it anymore for most cases... */ - -static inline int *nfs_rpc_alloc(int size) -{ - int *i; - - while (!(i = (int *)kmalloc(size+NFS_SLACK_SPACE,GFP_NFS))) { - schedule(); - } - return i; -} - -static inline void nfs_rpc_free(int *p) -{ - kfree((void *)p); -} +#include <asm/segment.h> /* - * Here are a bunch of xdr encode/decode functions that convert - * between machine dependent and xdr data formats. + * If NFS_DEBUG is defined, you can toggle NFS debugging by causing + * a lookup of "xyzzy". Just cd to an NFS-mounted filesystem and type + * 'ls xyzzy' to turn on debugging. */ +#ifdef NFS_DEBUG +# define NFSDBG_FACILITY NFSDBG_PROC +#endif -#define QUADLEN(len) (((len) + 3) >> 2) - -static inline int *xdr_encode_fhandle(int *p, struct nfs_fh *fhandle) -{ - *((struct nfs_fh *) p) = *fhandle; - return p + QUADLEN(sizeof(*fhandle)); -} - -static inline int *xdr_decode_fhandle(int *p, struct nfs_fh *fhandle) -{ - *fhandle = *((struct nfs_fh *) p); - return p + QUADLEN(sizeof(*fhandle)); -} - -static inline int *xdr_encode_string(int *p, const char *string) -{ - int len = strlen(string); - int quadlen = QUADLEN(len); - - p[quadlen] = 0; - *p++ = htonl(len); - memcpy(p, string, len); - return p + quadlen; -} - -static inline int *xdr_decode_string(int *p, char *string, unsigned int maxlen) -{ - unsigned int len = ntohl(*p++); - if (len > maxlen) - return NULL; - memcpy(string, p, len); - string[len] = '\0'; - return p + QUADLEN(len); -} - -static inline int *xdr_decode_string2(int *p, char **string, unsigned int *len, - unsigned int maxlen) -{ - *len = ntohl(*p++); - if (*len > maxlen) - return NULL; - *string = (char *) p; - return p + QUADLEN(*len); -} - - -static inline int *xdr_encode_data(int *p, const char *data, int len) -{ - int quadlen = QUADLEN(len); - - p[quadlen] = 0; - *p++ = htonl(len); - copy_from_user(p, data, len); - return p + quadlen; -} - -static inline int *xdr_decode_data(int *p, char *data, int *lenp, int maxlen) -{ - unsigned len = *lenp = ntohl(*p++); - if (len > maxlen) - return NULL; - memcpy(data, p, len); - return p + QUADLEN(len); -} - -static int *xdr_decode_fattr(int *p, struct nfs_fattr *fattr) -{ - fattr->type = (enum nfs_ftype) ntohl(*p++); - fattr->mode = ntohl(*p++); - fattr->nlink = ntohl(*p++); - fattr->uid = ntohl(*p++); - fattr->gid = ntohl(*p++); - fattr->size = ntohl(*p++); - fattr->blocksize = ntohl(*p++); - fattr->rdev = ntohl(*p++); - fattr->blocks = ntohl(*p++); - fattr->fsid = ntohl(*p++); - fattr->fileid = ntohl(*p++); - fattr->atime.seconds = ntohl(*p++); - fattr->atime.useconds = ntohl(*p++); - fattr->mtime.seconds = ntohl(*p++); - fattr->mtime.useconds = ntohl(*p++); - fattr->ctime.seconds = ntohl(*p++); - fattr->ctime.useconds = ntohl(*p++); - return p; -} - -static int *xdr_encode_sattr(int *p, struct nfs_sattr *sattr) -{ - *p++ = htonl(sattr->mode); - *p++ = htonl(sattr->uid); - *p++ = htonl(sattr->gid); - *p++ = htonl(sattr->size); - *p++ = htonl(sattr->atime.seconds); - *p++ = htonl(sattr->atime.useconds); - *p++ = htonl(sattr->mtime.seconds); - *p++ = htonl(sattr->mtime.useconds); - return p; -} - -static int *xdr_decode_entry(int *p, struct nfs_entry *entry) -{ - entry->fileid = ntohl(*p++); - if (!(p = xdr_decode_string(p, entry->name, NFS_MAXNAMLEN))) - return NULL; - entry->cookie = ntohl(*p++); - entry->eof = 0; - return p; -} - -static int *xdr_decode_fsinfo(int *p, struct nfs_fsinfo *res) -{ - res->tsize = ntohl(*p++); - res->bsize = ntohl(*p++); - res->blocks = ntohl(*p++); - res->bfree = ntohl(*p++); - res->bavail = ntohl(*p++); - return p; -} /* * One function for each procedure in the NFS protocol. */ - -int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +int +nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) { - int *p, *p0; - int status; - int ruid = 0; + int status; - PRINTK("NFS call getattr\n"); - if (!(p0 = nfs_rpc_alloc(server->rsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_GETATTR, ruid); - p = xdr_encode_fhandle(p, fhandle); - if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - p = xdr_decode_fattr(p, fattr); - PRINTK("NFS reply getattr\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply getattr failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + dprintk("NFS call getattr\n"); + status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0); + dprintk("NFS reply getattr\n"); return status; } -int nfs_proc_setattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_sattr *sattr, struct nfs_fattr *fattr) +int +nfs_proc_setattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_sattr *sattr, struct nfs_fattr *fattr) { - int *p, *p0; - int status; - int ruid = 0; + struct nfs_sattrargs arg = { fhandle, sattr }; + int status; - PRINTK("NFS call setattr\n"); - if (!(p0 = nfs_rpc_alloc(server->wsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_SETATTR, ruid); - p = xdr_encode_fhandle(p, fhandle); - p = xdr_encode_sattr(p, sattr); - if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - p = xdr_decode_fattr(p, fattr); - PRINTK("NFS reply setattr\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply setattr failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + dprintk("NFS call setattr\n"); + status = rpc_call(server->client, NFSPROC_SETATTR, &arg, fattr, 0); + dprintk("NFS reply setattr\n"); return status; } -int nfs_proc_lookup(struct nfs_server *server, struct nfs_fh *dir, const char *name, +int +nfs_proc_lookup(struct nfs_server *server, struct nfs_fh *dir, const char *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - int *p, *p0; - int status; - int ruid = 0; + struct nfs_diropargs arg = { dir, name }; + struct nfs_diropok res = { fhandle, fattr }; + int status; - PRINTK("NFS call lookup %s\n", name); -#ifdef NFS_PROC_DEBUG + dprintk("NFS call lookup %s\n", name); +#ifdef RPC_DEBUG if (!strcmp(name, "xyzzy")) - proc_debug = 1 - proc_debug; + nfs_debug = ~nfs_debug; #endif - if (!(p0 = nfs_rpc_alloc(server->rsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_LOOKUP, ruid); - p = xdr_encode_fhandle(p, dir); - p = xdr_encode_string(p, name); - if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - p = xdr_decode_fhandle(p, fhandle); - p = xdr_decode_fattr(p, fattr); - PRINTK("NFS reply lookup\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply lookup failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + status = rpc_call(server->client, NFSPROC_LOOKUP, &arg, &res, 0); + dprintk("NFS reply lookup: %d\n", status); return status; } -int nfs_proc_readlink(struct nfs_server *server, struct nfs_fh *fhandle, - int **p0, char **string, unsigned int *len, unsigned int maxlen) +int +nfs_proc_readlink(struct nfs_server *server, struct nfs_fh *fhandle, + void **p0, char **string, unsigned int *len, + unsigned int maxlen) { - int *p; - int status, ruid = 0; + struct nfs_readlinkres res = { string, len, maxlen, NULL }; + int status; - PRINTK("NFS call readlink\n"); - if (!(*p0 = nfs_rpc_alloc(server->rsize))) - return -EIO; -retry: - p = nfs_rpc_header(*p0, NFSPROC_READLINK, ruid); - p = xdr_encode_fhandle(p, fhandle); - if ((status = nfs_rpc_call(server, *p0, p, server->rsize)) < 0) - return status; - if (!(p = nfs_rpc_verify(*p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - if (!(p = xdr_decode_string2(p, string, len, maxlen))) { - printk("nfs_proc_readlink: giant pathname\n"); - status = -errno_NFSERR_IO; - } - else /* status = 0, */ - PRINTK("NFS reply readlink\n"); - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply readlink failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } + dprintk("NFS call readlink\n"); + status = rpc_call(server->client, NFSPROC_READLINK, fhandle, &res, 0); + dprintk("NFS reply readlink: %d\n", status); + if (!status) + *p0 = res.buffer; + else if (res.buffer) + kfree(res.buffer); return status; } -int nfs_proc_read(struct nfs_server *server, struct nfs_fh *fhandle, - int offset, int count, char *data, struct nfs_fattr *fattr) -{ - int *p, *p0; - int status; - int ruid = 0; - int len; - - PRINTK("NFS call read %d @ %d\n", count, offset); - if (!(p0 = nfs_rpc_alloc(server->rsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_READ, ruid); - p = xdr_encode_fhandle(p, fhandle); - *p++ = htonl(offset); - *p++ = htonl(count); - *p++ = htonl(count); /* traditional, could be any value */ - if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - p = xdr_decode_fattr(p, fattr); - if (!(p = xdr_decode_data(p, data, &len, count))) { - printk("nfs_proc_read: giant data size\n"); - status = -errno_NFSERR_IO; - } - else { - status = len; - PRINTK("NFS reply read %d\n", len); - } - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply read failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); +int +nfs_proc_read(struct nfs_server *server, struct nfs_fh *fhandle, int swap, + unsigned long offset, unsigned int count, + void *buffer, struct nfs_fattr *fattr) +{ + struct nfs_readargs arg = { fhandle, offset, count, buffer }; + struct nfs_readres res = { fattr, count }; + int status; + + dprintk("NFS call read %d @ %ld\n", count, offset); + status = rpc_call(server->client, NFSPROC_READ, &arg, &res, + swap? NFS_RPC_SWAPFLAGS : 0); + dprintk("NFS reply read: %d\n", status); return status; } int -nfs_proc_read_request(struct rpc_ioreq *req, struct nfs_server *server, - struct nfs_fh *fh, unsigned long offset, - unsigned long count, __u32 *buf) +nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, int swap, + unsigned long offset, unsigned int count, + const void *buffer, struct nfs_fattr *fattr) { - __u32 *p, *p0; - int len; + struct nfs_writeargs arg = { fhandle, offset, count, buffer }; + int status; - PRINTK("NFS reqst read %ld @ %ld\n", count, offset); - if (!(p0 = nfs_rpc_alloc(NFS_SLACK_SPACE))) - return -EIO; - - p = nfs_rpc_header(p0, NFSPROC_READ, 0); - p = xdr_encode_fhandle(p, fh); - *p++ = htonl(offset); - *p++ = htonl(count); - *p++ = htonl(count); /* traditional, could be any value */ - req->rq_svec[0].iov_base = p0; - req->rq_svec[0].iov_len = (p - p0) << 2; - req->rq_slen = (p - p0) << 2; - req->rq_snr = 1; - - len = (6 + 1 + 17 + 1); /* standard READ reply header */ - req->rq_rvec[0].iov_base = p0; - req->rq_rvec[0].iov_len = len << 2; - req->rq_rvec[1].iov_base = buf; - req->rq_rvec[1].iov_len = count; - req->rq_rvec[2].iov_base = p0 + len; /* spill buffer */ - req->rq_rvec[2].iov_len = (NFS_SLACK_SPACE - len) << 2; - req->rq_rlen = count + NFS_SLACK_SPACE; - req->rq_rnr = 3; - - req->rq_addr = &server->toaddr; - req->rq_alen = sizeof(server->toaddr); - - return rpc_transmit(server->rsock, req); + dprintk("NFS call write %d @ %ld\n", count, offset); + status = rpc_call(server->client, NFSPROC_WRITE, &arg, fattr, + swap? (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) : 0); + dprintk("NFS reply read: %d\n", status); + return status < 0? status : count; } int -nfs_proc_read_reply(struct rpc_ioreq *req, struct nfs_fattr *fattr) +nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir, + const char *name, struct nfs_sattr *sattr, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - int status; - __u32 *p0, *p; - int count; - - p0 = (__u32 *) req->rq_rvec[0].iov_base; + struct nfs_createargs arg = { dir, name, sattr }; + struct nfs_diropok res = { fhandle, fattr }; + int status; - if (!(p = nfs_rpc_verify(p0))) { - /* Tell the upper layers to retry */ - status = -EAGAIN; - /* status = -errno_NFSERR_IO; */ - } else if ((status = ntohl(*p++)) == NFS_OK) { - p = xdr_decode_fattr(p, fattr); - count = ntohl(*p++); - if (p != req->rq_rvec[2].iov_base) { - /* unexpected RPC reply header size. punt. - * fixme: move iovec contents to align data - * on page boundary and adjust RPC header size - * guess. */ - status = -errno_NFSERR_IO; - PRINTK("NFS reply read odd header size %d\n", - (p - p0) << 2); - } else { - status = count; - PRINTK("NFS reply read %d\n", count); - } - } - else { - PRINTK("NFS reply read failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); - return status; -} - -int nfs_proc_write(struct inode * inode, int offset, - int count, const char *data, struct nfs_fattr *fattr) -{ - int *p, *p0; - int status; - int ruid = 0; - void * kdata; /* address of kernel copy */ - struct nfs_server * server = NFS_SERVER(inode); - struct nfs_fh *fhandle = NFS_FH(inode); - - PRINTK("NFS call write %d @ %d\n", count, offset); - if (!(p0 = nfs_rpc_alloc(server->wsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_WRITE, ruid); - p = xdr_encode_fhandle(p, fhandle); - *p++ = htonl(offset); /* traditional, could be any value */ - *p++ = htonl(offset); - *p++ = htonl(count); /* traditional, could be any value */ - kdata = (void *) (p+1); /* start of data in RPC buffer */ - p = xdr_encode_data(p, data, count); - if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - update_vm_cache(inode, offset, kdata, count); - p = xdr_decode_fattr(p, fattr); - PRINTK("NFS reply write\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply write failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + dprintk("NFS call create %s\n", name); + status = rpc_call(server->client, NFSPROC_CREATE, &arg, &res, 0); + dprintk("NFS reply create: %d\n", status); return status; } -int nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir, - const char *name, struct nfs_sattr *sattr, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) -{ - int *p, *p0; - int status; - int ruid = 0; - - PRINTK("NFS call create %s\n", name); - if (!(p0 = nfs_rpc_alloc(server->wsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_CREATE, ruid); - p = xdr_encode_fhandle(p, dir); - p = xdr_encode_string(p, name); - p = xdr_encode_sattr(p, sattr); - if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - p = xdr_decode_fhandle(p, fhandle); - p = xdr_decode_fattr(p, fattr); - PRINTK("NFS reply create\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply create failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); - return status; -} - -int nfs_proc_remove(struct nfs_server *server, struct nfs_fh *dir, const char *name) -{ - int *p, *p0; - int status; - int ruid = 0; - - PRINTK("NFS call remove %s\n", name); - if (!(p0 = nfs_rpc_alloc(server->wsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_REMOVE, ruid); - p = xdr_encode_fhandle(p, dir); - p = xdr_encode_string(p, name); - if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - PRINTK("NFS reply remove\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply remove failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); - return status; -} - -int nfs_proc_rename(struct nfs_server *server, - struct nfs_fh *old_dir, const char *old_name, - struct nfs_fh *new_dir, const char *new_name, - int must_be_dir) -{ - int *p, *p0; - int status; - int ruid = 0; - - /* - * Disallow "rename()" with trailing slashes over NFS: getting - * POSIX.1 behaviour is just too unlikely. - */ - if (must_be_dir) - return -EINVAL; - PRINTK("NFS call rename %s -> %s\n", old_name, new_name); - if (!(p0 = nfs_rpc_alloc(server->wsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_RENAME, ruid); - p = xdr_encode_fhandle(p, old_dir); - p = xdr_encode_string(p, old_name); - p = xdr_encode_fhandle(p, new_dir); - p = xdr_encode_string(p, new_name); - if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - PRINTK("NFS reply rename\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply rename failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); - return status; -} - -int nfs_proc_link(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fh *dir, const char *name) +int +nfs_proc_remove(struct nfs_server *server, struct nfs_fh *dir, const char *name) { - int *p, *p0; - int status; - int ruid = 0; + struct nfs_diropargs arg = { dir, name }; + int status; - PRINTK("NFS call link %s\n", name); - if (!(p0 = nfs_rpc_alloc(server->wsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_LINK, ruid); - p = xdr_encode_fhandle(p, fhandle); - p = xdr_encode_fhandle(p, dir); - p = xdr_encode_string(p, name); - if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - PRINTK("NFS reply link\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply link failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + dprintk("NFS call remove %s\n", name); + status = rpc_call(server->client, NFSPROC_REMOVE, &arg, NULL, 0); + dprintk("NFS reply remove: %d\n", status); return status; } -int nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir, - const char *name, const char *path, struct nfs_sattr *sattr) +int +nfs_proc_rename(struct nfs_server *server, + struct nfs_fh *old_dir, const char *old_name, + struct nfs_fh *new_dir, const char *new_name) { - int *p, *p0; - int status; - int ruid = 0; + struct nfs_renameargs arg = { old_dir, old_name, new_dir, new_name }; + int status; - PRINTK("NFS call symlink %s -> %s\n", name, path); - if (!(p0 = nfs_rpc_alloc(server->wsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_SYMLINK, ruid); - p = xdr_encode_fhandle(p, dir); - p = xdr_encode_string(p, name); - p = xdr_encode_string(p, path); - p = xdr_encode_sattr(p, sattr); - if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - PRINTK("NFS reply symlink\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply symlink failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + dprintk("NFS call rename %s -> %s\n", old_name, new_name); + status = rpc_call(server->client, NFSPROC_RENAME, &arg, NULL, 0); + dprintk("NFS reply rename: %d\n", status); return status; } -int nfs_proc_mkdir(struct nfs_server *server, struct nfs_fh *dir, - const char *name, struct nfs_sattr *sattr, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) +int +nfs_proc_link(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fh *dir, const char *name) { - int *p, *p0; - int status; - int ruid = 0; + struct nfs_linkargs arg = { fhandle, dir, name }; + int status; - PRINTK("NFS call mkdir %s\n", name); - if (!(p0 = nfs_rpc_alloc(server->wsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_MKDIR, ruid); - p = xdr_encode_fhandle(p, dir); - p = xdr_encode_string(p, name); - p = xdr_encode_sattr(p, sattr); - if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - p = xdr_decode_fhandle(p, fhandle); - p = xdr_decode_fattr(p, fattr); - PRINTK("NFS reply mkdir\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply mkdir failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + dprintk("NFS call link %s\n", name); + status = rpc_call(server->client, NFSPROC_LINK, &arg, NULL, 0); + dprintk("NFS reply link: %d\n", status); return status; } -int nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir, const char *name) +int +nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir, + const char *name, const char *path, + struct nfs_sattr *sattr) { - int *p, *p0; - int status; - int ruid = 0; + struct nfs_symlinkargs arg = { dir, name, path, sattr }; + int status; - PRINTK("NFS call rmdir %s\n", name); - if (!(p0 = nfs_rpc_alloc(server->wsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_RMDIR, ruid); - p = xdr_encode_fhandle(p, dir); - p = xdr_encode_string(p, name); - if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - PRINTK("NFS reply rmdir\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply rmdir failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + dprintk("NFS call symlink %s -> %s\n", name, path); + status = rpc_call(server->client, NFSPROC_SYMLINK, &arg, NULL, 0); + dprintk("NFS reply symlink: %d\n", status); return status; } -int nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle, - int cookie, int count, struct nfs_entry *entry) +int +nfs_proc_mkdir(struct nfs_server *server, struct nfs_fh *dir, + const char *name, struct nfs_sattr *sattr, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - int *p, *p0; - int status; - int ruid = 0; - int i; - int size; - int eof; + struct nfs_createargs arg = { dir, name, sattr }; + struct nfs_diropok res = { fhandle, fattr }; + int status; - PRINTK("NFS call readdir %d @ %d\n", count, cookie); - size = server->rsize; - if (!(p0 = nfs_rpc_alloc(server->rsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_READDIR, ruid); - p = xdr_encode_fhandle(p, fhandle); - *p++ = htonl(cookie); - *p++ = htonl(size); - if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - for (i = 0; i < count && *p++; i++) { - if (!(p = xdr_decode_entry(p, entry++))) - break; - } - if (!p) { - printk("nfs_proc_readdir: giant filename\n"); - status = -errno_NFSERR_IO; - } - else { - eof = (i == count && !*p++ && *p++) - || (i < count && *p++); - if (eof && i) - entry[-1].eof = 1; - PRINTK("NFS reply readdir %d %s\n", i, - eof ? "eof" : ""); - status = i; - } - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply readdir failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + dprintk("NFS call mkdir %s\n", name); + status = rpc_call(server->client, NFSPROC_MKDIR, &arg, &res, 0); + dprintk("NFS reply mkdir: %d\n", status); return status; } -int nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *res) +int +nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir, const char *name) { - int *p, *p0; - int status; - int ruid = 0; + struct nfs_diropargs arg = { dir, name }; + int status; - PRINTK("NFS call statfs\n"); - if (!(p0 = nfs_rpc_alloc(server->rsize))) - return -EIO; -retry: - p = nfs_rpc_header(p0, NFSPROC_STATFS, ruid); - p = xdr_encode_fhandle(p, fhandle); - if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { - nfs_rpc_free(p0); - return status; - } - if (!(p = nfs_rpc_verify(p0))) - status = -errno_NFSERR_IO; - else if ((status = ntohl(*p++)) == NFS_OK) { - p = xdr_decode_fsinfo(p, res); - PRINTK("NFS reply statfs\n"); - /* status = 0; */ - } - else { - if (!ruid && current->fsuid == 0 && current->uid != 0) { - ruid = 1; - goto retry; - } - PRINTK("NFS reply statfs failed = %d\n", status); - status = -nfs_stat_to_errno(status); - } - nfs_rpc_free(p0); + dprintk("NFS call rmdir %s\n", name); + status = rpc_call(server->client, NFSPROC_RMDIR, &arg, NULL, 0); + dprintk("NFS reply rmdir: %d\n", status); return status; } /* - * Here are a few RPC-assist functions. + * The READDIR implementation is somewhat hackish - we pass a temporary + * buffer to the encode function, which installs it in the receive + * iovec. The dirent buffer itself is passed in the result struct. */ - -int *rpc_header(int *p, int procedure, int program, int version, - int uid, int gid, - int ngroup, gid_t *groups) -{ - int *p1; - static int xid = 0; - unsigned char *sys = (unsigned char *) system_utsname.nodename; - - if (xid == 0) { - xid = CURRENT_TIME; - xid ^= (sys[3]<<24) | (sys[2]<<16) | (sys[1]<<8) | sys[0]; - } - *p++ = htonl(++xid); - *p++ = htonl(RPC_CALL); - *p++ = htonl(RPC_VERSION); - *p++ = htonl(program); - *p++ = htonl(version); - *p++ = htonl(procedure); - *p++ = htonl(RPC_AUTH_UNIX); - p1 = p++; - *p++ = htonl(CURRENT_TIME); /* traditional, could be anything */ - p = xdr_encode_string(p, (char *) sys); - *p++ = htonl(uid); - *p++ = htonl(gid); - if (ngroup > 16) - ngroup = 16; - *p++ = htonl(ngroup); - while (ngroup) { - *p++ = htonl(*groups); - groups++; - ngroup--; - } - *p1 = htonl((p - (p1 + 1)) << 2); - *p++ = htonl(RPC_AUTH_NULL); - *p++ = htonl(0); - return p; -} - - -static int *nfs_rpc_header(int *p, int procedure, int ruid) -{ - return rpc_header(p, procedure, NFS_PROGRAM, NFS_VERSION, - (ruid ? current->uid : current->fsuid), - current->egid, current->ngroups, current->groups); -} - - -int *rpc_verify(int *p) +int +nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle, + u32 cookie, unsigned int size, struct nfs_entry *entry) { - unsigned int n; + struct nfs_readdirargs arg; + struct nfs_readdirres res; + void * buffer; + int status; - p++; - if ((n = ntohl(*p++)) != RPC_REPLY) { - printk("nfs_rpc_verify: not an RPC reply: %x\n", n); - return NULL; - } - if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { - printk("nfs_rpc_verify: RPC call rejected: %d\n", n); - return NULL; - } - switch (n = ntohl(*p++)) { - case RPC_AUTH_NULL: case RPC_AUTH_UNIX: case RPC_AUTH_SHORT: - break; - default: - printk("nfs_rpc_verify: bad RPC authentication type: %d\n", n); - return NULL; - } - if ((n = ntohl(*p++)) > 400) { - printk("nfs_rpc_verify: giant auth size\n"); - return NULL; - } - p += QUADLEN(n); - if ((n = ntohl(*p++)) != RPC_SUCCESS) { - printk("nfs_rpc_verify: RPC call failed: %d\n", n); - return NULL; + /* First get a temp buffer for the readdir reply */ + while (!(buffer = (void *) get_free_page(GFP_USER))) { + need_resched = 1; + schedule(); + if (signalled()) + return -ERESTARTSYS; } - return p; -} + arg.fh = fhandle; + arg.cookie = cookie; + arg.buffer = buffer; + arg.bufsiz = server->rsize < PAGE_SIZE? server->rsize : PAGE_SIZE; + res.buffer = entry; + res.bufsiz = size; -static int *nfs_rpc_verify(int *p) -{ - return rpc_verify(p); + dprintk("NFS call readdir %d\n", cookie); + status = rpc_call(server->client, NFSPROC_READDIR, &arg, &res, 0); + dprintk("NFS reply readdir: %d\n", status); + free_page((unsigned long) buffer); + return status; } - -/* - * We need to translate between nfs status return values and - * the local errno values which may not be the same. - */ - -static struct { - int stat; - int errno; -} nfs_errtbl[] = { - { NFS_OK, 0 }, - { NFSERR_PERM, EPERM }, - { NFSERR_NOENT, ENOENT }, - { NFSERR_IO, errno_NFSERR_IO }, - { NFSERR_NXIO, ENXIO }, - { NFSERR_EAGAIN, EAGAIN }, - { NFSERR_ACCES, EACCES }, - { NFSERR_EXIST, EEXIST }, - { NFSERR_NODEV, ENODEV }, - { NFSERR_NOTDIR, ENOTDIR }, - { NFSERR_ISDIR, EISDIR }, - { NFSERR_INVAL, EINVAL }, - { NFSERR_FBIG, EFBIG }, - { NFSERR_NOSPC, ENOSPC }, - { NFSERR_ROFS, EROFS }, - { NFSERR_NAMETOOLONG, ENAMETOOLONG }, - { NFSERR_NOTEMPTY, ENOTEMPTY }, - { NFSERR_DQUOT, EDQUOT }, - { NFSERR_STALE, ESTALE }, -#ifdef EWFLUSH - { NFSERR_WFLUSH, EWFLUSH }, -#endif - { -1, EIO } -}; - -static int nfs_stat_to_errno(int stat) +int +nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) { - int i; + int status; - for (i = 0; nfs_errtbl[i].stat != -1; i++) { - if (nfs_errtbl[i].stat == stat) - return nfs_errtbl[i].errno; - } - printk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); - return nfs_errtbl[i].errno; + dprintk("NFS call statfs\n"); + status = rpc_call(server->client, NFSPROC_STATFS, fhandle, info, 0); + dprintk("NFS reply statfs: %d\n", status); + return status; } - diff --git a/fs/nfs/read.c b/fs/nfs/read.c new file mode 100644 index 000000000..cf7c5ece7 --- /dev/null +++ b/fs/nfs/read.c @@ -0,0 +1,232 @@ +/* + * linux/fs/nfs/read.c + * + * Block I/O for NFS + * + * Partial copy of Linus' read cache modifications to fs/nfs/file.c + * modified for async RPC by okir@monad.swb.de + * + * We do an ugly hack here in order to return proper error codes to the + * user program when a read request failed: since generic_file_read + * only checks the return value of inode->i_op->readpage() which is always 0 + * for async RPC, we set the error bit of the page to 1 when an error occurs, + * and make nfs_readpage transmit requests synchronously when encountering this. + * This is only a small problem, though, since we now retry all operations + * within the RPC code when root squashing is suspected. + */ + +#define NFS_NEED_XDR_TYPES +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/mm.h> +#include <linux/malloc.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> + +#include <asm/segment.h> +#include <asm/system.h> + +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + +struct nfs_rreq { + struct inode * ra_inode; /* inode from which to read */ + struct page * ra_page; /* page to be read */ + struct nfs_readargs ra_args; /* XDR argument struct */ + struct nfs_readres ra_res; /* ... and result struct */ + struct nfs_fattr ra_fattr; /* fattr storage */ +}; + +/* Hack for future NFS swap support */ +#ifndef IS_SWAPFILE +# define IS_SWAPFILE(inode) (0) +#endif + + +/* + * Set up the NFS read request struct + */ +static inline void +nfs_readreq_setup(struct nfs_rreq *req, struct nfs_fh *fh, + unsigned long offset, void *buffer, unsigned int rsize) +{ + req->ra_args.fh = fh; + req->ra_args.offset = offset; + req->ra_args.count = rsize; + req->ra_args.buffer = buffer; + req->ra_res.fattr = &req->ra_fattr; + req->ra_res.count = rsize; +} + + +/* + * Read a page synchronously. + */ +int +nfs_readpage_sync(struct inode *inode, struct page *page) +{ + struct nfs_rreq rqst; + unsigned long offset = page->offset; + char *buffer = (char *) page_address(page); + int rsize = NFS_SERVER(inode)->rsize; + int result, refresh = 0; + int count = PAGE_SIZE; + int flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0; + + dprintk("NFS: nfs_readpage_sync(%p)\n", page); + clear_bit(PG_error, &page->flags); + + do { + if (count < rsize) + rsize = count; + + dprintk("NFS: nfs_proc_read(%s, (%x,%lx), %ld, %d, %p)\n", + NFS_SERVER(inode)->hostname, inode->i_dev, + inode->i_ino, offset, rsize, buffer); + + /* Set up arguments and perform rpc call */ + nfs_readreq_setup(&rqst, NFS_FH(inode), offset, buffer, rsize); + result = rpc_call(NFS_CLIENT(inode), NFSPROC_READ, + &rqst.ra_args, &rqst.ra_res, flags); + + /* + * Even if we had a partial success we can't mark the page + * cache valid. + */ + if (result < 0) { + if (result == -EISDIR) + result = -EINVAL; + goto io_error; + } + refresh = 1; + count -= result; + offset += result; + buffer += result; + if (result < rsize) /* NFSv2ism */ + break; + } while (count); + + memset(buffer, 0, count); + set_bit(PG_uptodate, &page->flags); + result = 0; + +io_error: + if (refresh) + nfs_refresh_inode(inode, &rqst.ra_fattr); + clear_bit(PG_locked, &page->flags); + wake_up(&page->wait); + return result; +} + +/* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +static void +nfs_readpage_result(struct rpc_task *task) +{ + struct nfs_rreq *req = (struct nfs_rreq *) task->tk_calldata; + struct page *page = req->ra_page; + int result = task->tk_status; + static int succ = 0, fail = 0; + + dprintk("NFS: %4d received callback for page %lx, result %d\n", + task->tk_pid, page_address(page), result); + + if (result >= 0) { + result = req->ra_res.count; + if (result < PAGE_SIZE) { + memset((char *) page_address(page) + result, 0, + PAGE_SIZE - result); + } + nfs_refresh_inode(req->ra_inode, &req->ra_fattr); + set_bit(PG_uptodate, &page->flags); + succ++; + } else { + set_bit(PG_error, &page->flags); + fail++; + dprintk("NFS: %d successful reads, %d failures\n", succ, fail); + } + iput(req->ra_inode); + clear_bit(PG_locked, &page->flags); + wake_up(&page->wait); + + free_page(page_address(page)); + + rpc_release_task(task); + kfree(req); +} + +static inline int +nfs_readpage_async(struct inode *inode, struct page *page) +{ + struct nfs_rreq *req; + int result, flags; + + dprintk("NFS: nfs_readpage_async(%p)\n", page); + flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + + if (NFS_CONGESTED(inode) + || !(req = (struct nfs_rreq *) rpc_allocate(flags, sizeof(*req)))) { + dprintk("NFS: deferring async READ request.\n"); + return -1; + } + + /* Initialize request */ + nfs_readreq_setup(req, NFS_FH(inode), page->offset, + (void *) page_address(page), PAGE_SIZE); + req->ra_inode = inode; + req->ra_page = page; + + /* Start the async call */ + dprintk("NFS: executing async READ request.\n"); + result = rpc_do_call(NFS_CLIENT(inode), NFSPROC_READ, + &req->ra_args, &req->ra_res, flags, + nfs_readpage_result, req); + + if (result >= 0) { + inode->i_count++; + atomic_inc(&page->count); + return 0; + } + + dprintk("NFS: failed to enqueue async READ request.\n"); + kfree(req); + return -1; +} + +/* + * Read a page over NFS. + * We read the page synchronously in the following cases: + * - The file is a swap file. Swap-ins are always sync operations, + * so there's no need bothering to make async reads 100% fail-safe. + * - The NFS rsize is smaller than PAGE_SIZE. We could kludge our way + * around this by creating several consecutive read requests, but + * that's hardly worth it. + * - The error flag is set for this page. This happens only when a + * previous async read operation failed. + * - The server is congested. + */ +int +nfs_readpage(struct inode *inode, struct page *page) +{ + unsigned long address; + int error = -1; + + dprintk("NFS: nfs_readpage %08lx\n", page_address(page)); + set_bit(PG_locked, &page->flags); + address = page_address(page); + atomic_inc(&page->count); + if (!IS_SWAPFILE(inode) && !PageError(page) + && NFS_SERVER(inode)->rsize >= PAGE_SIZE) + error = nfs_readpage_async(inode, page); + if (error < 0) /* couldn't enqueue */ + error = nfs_readpage_sync(inode, page); + if (error < 0 && IS_SWAPFILE(inode)) + printk("Aiee.. nfs swap-in of page failed!\n"); + free_page(address); + return error; +} diff --git a/fs/nfs/rpcsock.c b/fs/nfs/rpcsock.c deleted file mode 100644 index f627237d5..000000000 --- a/fs/nfs/rpcsock.c +++ /dev/null @@ -1,581 +0,0 @@ -/* - * linux/fs/nfs/rpcsock.c - * - * This is a generic RPC call interface for datagram sockets that is able - * to place several concurrent RPC requests at the same time. It works like - * this: - * - * - When a process places a call, it allocates a request slot if - * one is available. Otherwise, it sleeps on the backlog queue - * (rpc_reserve). - * - Then, the message is transmitted via rpc_send (exported by name of - * rpc_transmit). - * - Finally, the process waits for the call to complete (rpc_doio): - * The first process on the receive queue waits for the next RPC packet, - * and peeks at the XID. If it finds a matching request, it receives - * the datagram on behalf of that process and wakes it up. Otherwise, - * the datagram is discarded. - * - If the process having received the datagram was the first one on - * the receive queue, it wakes up the next one to listen for replies. - * - It then removes itself from the request queue (rpc_release). - * If there are more callers waiting on the backlog queue, they are - * woken up, too. - * - * Mar 1996: - * - Split up large functions into smaller chunks as per Linus' coding - * style. Found an interesting bug this way, too. - * - Added entry points for nfsiod. - * - * Copyright (C) 1995, 1996, Olaf Kirch <okir@monad.swb.de> - */ - -#include <linux/types.h> -#include <linux/malloc.h> -#include <linux/sched.h> -#include <linux/nfs_fs.h> -#include <linux/errno.h> -#include <linux/socket.h> -#include <linux/fcntl.h> -#include <linux/in.h> -#include <linux/net.h> -#include <linux/mm.h> -#include <linux/rpcsock.h> - -#include <linux/udp.h> -#include <net/sock.h> - -#include <asm/uaccess.h> - -#define msleep(sec) { current->timeout = sec * HZ / 1000; \ - current->state = TASK_INTERRUPTIBLE; \ - schedule(); \ - } - -#undef DEBUG_RPC -#ifdef DEBUG_RPC -#define dprintk(args...) printk(## args) -#else -#define dprintk(args...) -#endif - - -/* - * Insert new request into wait list. We make sure list is sorted by - * increasing timeout value. - */ -static inline void -rpc_insque(struct rpc_sock *rsock, struct rpc_wait *slot) -{ - struct rpc_wait *next = rsock->pending; - - slot->w_next = next; - slot->w_prev = NULL; - if (next) - next->w_prev = slot; - rsock->pending = slot; - slot->w_queued = 1; - - dprintk("RPC: inserted %p into queue\n", slot); -} - -/* - * Remove request from request queue - */ -static inline void -rpc_remque(struct rpc_sock *rsock, struct rpc_wait *slot) -{ - struct rpc_wait *prev = slot->w_prev, - *next = slot->w_next; - - if (prev != NULL) - prev->w_next = next; - else - rsock->pending = next; - if (next != NULL) - next->w_prev = prev; - - slot->w_queued = 0; - dprintk("RPC: removed %p from queue, head now %p.\n", - slot, rsock->pending); -} - -/* - * Write data to socket. - */ -static inline int -rpc_sendmsg(struct rpc_sock *rsock, struct iovec *iov, int nr, int len, - struct sockaddr *sap, int salen) -{ - struct socket *sock = rsock->sock; - struct msghdr msg; - unsigned long oldfs; - int result; - - msg.msg_iov = iov; - msg.msg_iovlen = nr; - msg.msg_name = sap; - msg.msg_namelen = salen; - msg.msg_control = NULL; - - oldfs = get_fs(); - set_fs(get_ds()); - result = sock->ops->sendmsg(sock, &msg, len, 0, 0); - set_fs(oldfs); - - dprintk("RPC: rpc_sendmsg(iov %p, len %d) = %d\n", iov, len, result); - return result; -} -/* - * Read data from socket - */ -static inline int -rpc_recvmsg(struct rpc_sock *rsock, struct iovec *iov, - int nr, int len, int flags) -{ - struct socket *sock = rsock->sock; - struct sockaddr sa; - struct msghdr msg; - unsigned long oldfs; - int result, alen; - - msg.msg_iov = iov; - msg.msg_iovlen = nr; - msg.msg_name = &sa; - msg.msg_namelen = sizeof(sa); - msg.msg_control = NULL; - - oldfs = get_fs(); - set_fs(get_ds()); - result = sock->ops->recvmsg(sock, &msg, len, 1, flags, &alen); - set_fs(oldfs); - - dprintk("RPC: rpc_recvmsg(iov %p, len %d) = %d\n", iov, len, result); - return result; -} - -/* - * This code is slightly complicated. Since the networking code does not - * honor the current->timeout value, we have to select on the socket. - */ -static inline int -rpc_select(struct rpc_sock *rsock) -{ - struct select_table_entry entry; - struct file *file = rsock->file; - select_table wait_table; - - dprintk("RPC: selecting on socket...\n"); - wait_table.nr = 0; - wait_table.entry = &entry; - current->state = TASK_INTERRUPTIBLE; - if (!file->f_op->select(file->f_inode, file, SEL_IN, &wait_table) - && !file->f_op->select(file->f_inode, file, SEL_IN, NULL)) { - schedule(); - remove_wait_queue(entry.wait_address, &entry.wait); - current->state = TASK_RUNNING; - if (current->signal & ~current->blocked) - return -ERESTARTSYS; - if (current->timeout == 0) - return -ETIMEDOUT; - } else if (wait_table.nr) - remove_wait_queue(entry.wait_address, &entry.wait); - current->state = TASK_RUNNING; - dprintk("RPC: ...Okay, there appears to be some data.\n"); - return 0; -} - -/* - * Reserve an RPC call slot. nocwait determines whether we wait in case - * of congestion or not. - */ -int -rpc_reserve(struct rpc_sock *rsock, struct rpc_ioreq *req, int nocwait) -{ - struct rpc_wait *slot; - - req->rq_slot = NULL; - - while (!(slot = rsock->free) || rsock->cong >= rsock->cwnd) { - if (nocwait) { - current->timeout = 0; - return -ENOBUFS; - } - dprintk("RPC: rpc_reserve waiting on backlog\n"); - interruptible_sleep_on(&rsock->backlog); - if (current->timeout == 0) - return -ETIMEDOUT; - if (current->signal & ~current->blocked) - return -ERESTARTSYS; - if (rsock->shutdown) - return -EIO; - } - - rsock->free = slot->w_next; - rsock->cong += RPC_CWNDSCALE; /* bump congestion value */ - - slot->w_queued = 0; - slot->w_gotit = 0; - slot->w_req = req; - - dprintk("RPC: reserved slot %p\n", slot); - req->rq_slot = slot; - return 0; -} - -/* - * Release an RPC call slot - */ -void -rpc_release(struct rpc_sock *rsock, struct rpc_ioreq *req) -{ - struct rpc_wait *slot = req->rq_slot; - - if (slot != NULL) { - dprintk("RPC: release slot %p\n", slot); - - /* Wake up the next receiver */ - if (slot == rsock->pending && slot->w_next != NULL) - wake_up(&slot->w_next->w_wait); - - /* remove slot from queue of pending */ - if (slot->w_queued) - rpc_remque(rsock, slot); - slot->w_next = rsock->free; - rsock->free = slot; - - /* decrease congestion value */ - rsock->cong -= RPC_CWNDSCALE; - if (rsock->cong < rsock->cwnd && rsock->backlog) - wake_up(&rsock->backlog); - if (rsock->shutdown) - wake_up(&rsock->shutwait); - - req->rq_slot = NULL; - } -} - -/* - * Adjust RPC congestion window - */ -static void -rpc_cwnd_adjust(struct rpc_sock *rsock, int timeout) -{ - unsigned long cwnd = rsock->cwnd; - - if (!timeout) { - if (rsock->cong >= cwnd) { - /* The (cwnd >> 1) term makes sure - * the result gets rounded properly. */ - cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + - (cwnd >> 1)) / cwnd; - if (cwnd > RPC_MAXCWND) - cwnd = RPC_MAXCWND; - } - } else { - if ((cwnd >>= 1) < RPC_CWNDSCALE) - cwnd = RPC_CWNDSCALE; - dprintk("RPC: cwnd decrease %08lx\n", cwnd); - } - dprintk("RPC: cong %08lx, cwnd was %08lx, now %08lx\n", - rsock->cong, rsock->cwnd, cwnd); - - rsock->cwnd = cwnd; -} - -static inline void -rpc_send_check(char *where, u32 *ptr) -{ - if (ptr[1] != htonl(RPC_CALL) || ptr[2] != htonl(RPC_VERSION)) { - printk("RPC: %s sending evil packet:\n" - " %08x %08x %08x %08x %08x %08x %08x %08x\n", - where, - ptr[0], ptr[1], ptr[2], ptr[3], - ptr[4], ptr[5], ptr[6], ptr[7]); - } -} - -/* - * Place the actual RPC call. - * We have to copy the iovec because sendmsg fiddles with its contents. - */ -static inline int -rpc_send(struct rpc_sock *rsock, struct rpc_wait *slot) -{ - struct rpc_ioreq *req = slot->w_req; - struct iovec iov[UIO_MAXIOV]; - - if (rsock->shutdown) - return -EIO; - - memcpy(iov, req->rq_svec, req->rq_snr * sizeof(iov[0])); - slot->w_xid = *(u32 *)(iov[0].iov_base); - if (!slot->w_queued) - rpc_insque(rsock, slot); - - dprintk("rpc_send(%p, %x)\n", slot, slot->w_xid); - rpc_send_check("rpc_send", (u32 *) req->rq_svec[0].iov_base); - return rpc_sendmsg(rsock, iov, req->rq_snr, req->rq_slen, - req->rq_addr, req->rq_alen); -} - -/* - * This is the same as rpc_send but for the functions exported to nfsiod - */ -int -rpc_transmit(struct rpc_sock *rsock, struct rpc_ioreq *req) -{ - rpc_send_check("rpc_transmit", (u32 *) req->rq_svec[0].iov_base); - return rpc_send(rsock, req->rq_slot); -} - -/* - * Receive and dispatch a single reply - */ -static inline int -rpc_grok(struct rpc_sock *rsock) -{ - struct rpc_wait *rovr; - struct rpc_ioreq *req; - struct iovec iov[UIO_MAXIOV]; - u32 xid; - int safe, result; - - iov[0].iov_base = (void *) &xid; - iov[0].iov_len = sizeof(xid); - result = rpc_recvmsg(rsock, iov, 1, sizeof(xid), MSG_PEEK); - - if (result < 0) { - switch (-result) { - case EAGAIN: case ECONNREFUSED: - return 0; - case ERESTARTSYS: - return result; - default: - dprintk("rpc_grok: recv error = %d\n", result); - } - } - if (result < 4) { - printk(KERN_WARNING "RPC: impossible RPC reply size %d\n", - result); - return 0; - } - - dprintk("RPC: rpc_grok: got xid %08lx\n", (unsigned long) xid); - - /* Look for the caller */ - safe = 0; - for (rovr = rsock->pending; rovr; rovr = rovr->w_next) { - if (rovr->w_xid == xid) - break; - if (safe++ > RPC_MAXREQS) { - printk(KERN_WARNING "RPC: loop in request Q!!\n"); - rovr = NULL; - break; - } - } - - if (!rovr || rovr->w_gotit) { - /* discard dgram */ - dprintk("RPC: rpc_grok: %s.\n", - rovr? "duplicate reply" : "bad XID"); - iov[0].iov_base = (void *) &xid; - iov[0].iov_len = sizeof(xid); - rpc_recvmsg(rsock, iov, 1, sizeof(xid), 0); - return 0; - } - req = rovr->w_req; - - /* Now receive the reply... Copy the iovec first because of - * memcpy_fromiovec fiddling. */ - memcpy(iov, req->rq_rvec, req->rq_rnr * sizeof(iov[0])); - result = rpc_recvmsg(rsock, iov, req->rq_rnr, req->rq_rlen, 0); - rovr->w_result = result; - rovr->w_gotit = 1; - - /* ... and wake up the process */ - wake_up(&rovr->w_wait); - - return result; -} - -/* - * Wait for the reply to our call. - */ -static int -rpc_recv(struct rpc_sock *rsock, struct rpc_wait *slot) -{ - int result; - - do { - /* If we are not the receiver, wait on the sidelines */ - dprintk("RPC: rpc_recv TP1\n"); - while (rsock->pending != slot) { - if (!slot->w_gotit) - interruptible_sleep_on(&slot->w_wait); - if (slot->w_gotit) - return slot->w_result; /* quite important */ - if (current->signal & ~current->blocked) - return -ERESTARTSYS; - if (rsock->shutdown) - return -EIO; - if (current->timeout == 0) - return -ETIMEDOUT; - } - - /* Wait for data to arrive */ - if ((result = rpc_select(rsock)) < 0) { - dprintk("RPC: select error = %d\n", result); - return result; - } - - /* Receive and dispatch */ - if ((result = rpc_grok(rsock)) < 0) - return result; - } while (current->timeout && !slot->w_gotit); - - return slot->w_gotit? slot->w_result : -ETIMEDOUT; -} - -/* - * Generic RPC call routine. This handles retries and timeouts etc pp. - * - * If sent is non-null, it assumes the called has already sent out the - * message, so it won't need to do so unless a timeout occurs. - */ -int -rpc_doio(struct rpc_sock *rsock, struct rpc_ioreq *req, - struct rpc_timeout *strategy, int sent) -{ - struct rpc_wait *slot; - int result, retries; - unsigned long timeout; - - timeout = strategy->to_initval; - retries = 0; - slot = req->rq_slot; - - do { - dprintk("RPC: rpc_doio: TP1 (req %p)\n", req); - current->timeout = jiffies + timeout; - if (slot == NULL) { - result = rpc_reserve(rsock, req, 0); - if (result == -ETIMEDOUT) - goto timedout; - if (result < 0) - break; - slot = req->rq_slot; - rpc_send_check("rpc_doio", - (u32 *) req->rq_svec[0].iov_base); - rpc_insque(rsock, slot); - } - - /* This check is for loopback NFS. Sometimes replies come - * in before biod has called rpc_doio... */ - if (slot->w_gotit) { - result = slot->w_result; - break; - } - - dprintk("RPC: rpc_doio: TP2\n"); - if (sent || (result = rpc_send(rsock, slot)) >= 0) { - result = rpc_recv(rsock, slot); - sent = 0; - } - - if (result != -ETIMEDOUT) { - /* dprintk("RPC: rpc_recv returned %d\n", result); */ - rpc_cwnd_adjust(rsock, 0); - break; - } - - rpc_cwnd_adjust(rsock, 1); - -timedout: - dprintk("RPC: rpc_recv returned timeout.\n"); - if (strategy->to_exponential) - timeout <<= 1; - else - timeout += strategy->to_increment; - if (strategy->to_maxval && timeout >= strategy->to_maxval) - timeout = strategy->to_maxval; - if (strategy->to_retries && ++retries >= strategy->to_retries) - break; - } while (1); - - dprintk("RPC: rpc_doio: TP3\n"); - current->timeout = 0; - return result; -} - -/* - */ -int -rpc_call(struct rpc_sock *rsock, struct rpc_ioreq *req, - struct rpc_timeout *strategy) -{ - int result; - - result = rpc_doio(rsock, req, strategy, 0); - if (req->rq_slot == NULL) - printk(KERN_WARNING "RPC: bad: rq_slot == NULL\n"); - rpc_release(rsock, req); - return result; -} - -struct rpc_sock * -rpc_makesock(struct file *file) -{ - struct rpc_sock *rsock; - struct socket *sock; - struct sock *sk; - struct rpc_wait *slot; - int i; - - dprintk("RPC: make RPC socket...\n"); - sock = &file->f_inode->u.socket_i; - if (sock->type != SOCK_DGRAM || sock->ops->family != AF_INET) { - printk(KERN_WARNING "RPC: only UDP sockets supported\n"); - return NULL; - } - sk = (struct sock *) sock->data; - - if ((rsock = kmalloc(sizeof(struct rpc_sock), GFP_KERNEL)) == NULL) - return NULL; - memset(rsock, 0, sizeof(*rsock)); /* Nnnngh! */ - - rsock->sock = sock; - rsock->inet = sk; - rsock->file = file; - rsock->cwnd = RPC_INITCWND; - - dprintk("RPC: slots %p, %p, ...\n", rsock->waiting, rsock->waiting + 1); - rsock->free = rsock->waiting; - for (i = 0, slot = rsock->waiting; i < RPC_MAXREQS-1; i++, slot++) - slot->w_next = slot + 1; - slot->w_next = NULL; - - dprintk("RPC: made socket %p\n", rsock); - return rsock; -} - -int -rpc_closesock(struct rpc_sock *rsock) -{ - unsigned long t0 = jiffies; - - rsock->shutdown = 1; - while (rsock->pending || waitqueue_active(&rsock->backlog)) { - interruptible_sleep_on(&rsock->shutwait); - if (current->signal & ~current->blocked) - return -EINTR; -#if 1 - if (t0 && t0 - jiffies > 60 * HZ) { - printk(KERN_WARNING "RPC: hanging in rpc_closesock.\n"); - t0 = 0; - } -#endif - } - - kfree(rsock); - return 0; -} diff --git a/fs/nfs/sock.c b/fs/nfs/sock.c deleted file mode 100644 index 6191cfeb3..000000000 --- a/fs/nfs/sock.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * linux/fs/nfs/sock.c - * - * Copyright (C) 1992, 1993 Rick Sladkey - * - * low-level nfs remote procedure call interface - * - * FIXES - * - * 2/7/94 James Bottomley and Jon Peatfield DAMTP, Cambridge University - * - * An xid mismatch no longer causes the request to be trashed. - * - * Peter Eriksson - incorrect XID used to confuse Linux - * Florian La Roche - use the correct max size, if reading a packet and - * also verify, if the whole packet has been read... - * more checks should be done in proc.c... - * - */ - -#include <linux/sched.h> -#include <linux/nfs_fs.h> -#include <linux/errno.h> -#include <linux/socket.h> -#include <linux/fcntl.h> -#include <linux/in.h> -#include <linux/net.h> -#include <linux/mm.h> -#include <linux/rpcsock.h> - -#include <asm/uaccess.h> - -#define _S(nr) (1<<((nr)-1)) - -/* - * Place a synchronous call to the NFS server, meaning that the process - * sleeps in rpc_call until it either receives a reply or a major timeout - * occurs. - * This is now merely a front-end to nfs_rpc_doio. - */ -int -nfs_rpc_call(struct nfs_server *server, int *start, int *end, int size) -{ - struct rpc_ioreq req; - - size += 1024; /* account for NFS slack space. ugly */ - - req.rq_addr = &server->toaddr; - req.rq_alen = sizeof(server->toaddr); - req.rq_slot = NULL; - - req.rq_svec[0].iov_base = start; - req.rq_svec[0].iov_len = (end - start) << 2; - req.rq_slen = (end - start) << 2; - req.rq_snr = 1; - req.rq_rvec[0].iov_base = start; - req.rq_rvec[0].iov_len = size; - req.rq_rlen = size; - req.rq_rnr = 1; - - return nfs_rpc_doio(server, &req, 0); -} - -int -nfs_rpc_doio(struct nfs_server *server, struct rpc_ioreq *req, int async) -{ - struct rpc_timeout timeout; - unsigned long maxtimeo; - unsigned long oldmask; - int major_timeout_seen, result; - - timeout.to_initval = server->timeo; - timeout.to_maxval = NFS_MAX_RPC_TIMEOUT*HZ/10; - timeout.to_retries = server->retrans; - timeout.to_exponential = 1; - - oldmask = current->blocked; - current->blocked |= ~(_S(SIGKILL) - | ((server->flags & NFS_MOUNT_INTR) - ? ((current->sig->action[SIGINT - 1].sa_handler == SIG_DFL - ? _S(SIGINT) : 0) - | (current->sig->action[SIGQUIT - 1].sa_handler == SIG_DFL - ? _S(SIGQUIT) : 0)) - : 0)); - - major_timeout_seen = 0; - maxtimeo = timeout.to_maxval; - - do { - result = rpc_doio(server->rsock, req, &timeout, async); - rpc_release(server->rsock, req); /* Release slot */ - - if (current->signal & ~current->blocked) - result = -ERESTARTSYS; - if (result == -ETIMEDOUT) { - if (async) - break; - if (server->flags & NFS_MOUNT_SOFT) { - printk("NFS server %s not responding, " - "timed out.\n", server->hostname); - result = -EIO; - break; - } - if (!major_timeout_seen) { -#if defined(CONFIG_SGISEEQ) && 1 - extern void sgiseeq_dump_rings(void); - extern void sgiseeq_my_reset(void); -#endif - printk("NFS server %s not responding, " - "still trying.\n", server->hostname); - major_timeout_seen = 1; -#if defined(CONFIG_SGISEEQ) && 1 - sgiseeq_dump_rings(); - sgiseeq_my_reset(); -#endif - } - if ((timeout.to_initval <<= 1) >= maxtimeo) { - timeout.to_initval = maxtimeo; - } - } else if (result < 0 && result != -ERESTARTSYS) { - printk("NFS: notice message: result = %d.\n", result); - } - } while (result == -ETIMEDOUT && !(server->flags & NFS_MOUNT_SOFT)); - - if (result >= 0 && major_timeout_seen) - printk("NFS server %s OK.\n", server->hostname); - /* 20 is the minimum RPC reply header size */ - if (result >= 0 && result < 20) { - printk("NFS: too small read memory size (%d bytes)\n", result); - result = -EIO; - } - - current->blocked = oldmask; - return result; -} diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index e628e0182..7ea2d6f99 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -48,9 +48,10 @@ struct inode_operations nfs_symlink_inode_operations = { static int nfs_follow_link(struct inode *dir, struct inode *inode, int flag, int mode, struct inode **res_inode) { - int error, *mem; + int error; unsigned int len; char *res, *res2; + void *mem; *res_inode = NULL; if (!dir) { @@ -95,9 +96,12 @@ static int nfs_follow_link(struct inode *dir, struct inode *inode, static int nfs_readlink(struct inode *inode, char *buffer, int buflen) { - int error, *mem; + int error; unsigned int len; char *res; + void *mem; + + dfprintk(VFS, "nfs: readlink(%x/%ld)\n", inode->i_dev, inode->i_ino); if (!S_ISLNK(inode->i_mode)) { iput(inode); diff --git a/fs/nfs/write.c b/fs/nfs/write.c new file mode 100644 index 000000000..f48df4571 --- /dev/null +++ b/fs/nfs/write.c @@ -0,0 +1,823 @@ +/* + * linux/fs/nfs/write.c + * + * Writing file data over NFS. + * + * We do it like this: When a (user) process wishes to write data to an + * NFS file, a write request is allocated that contains the RPC task data + * plus some info on the page to be written, and added to the inode's + * write chain. If the process writes past the end of the page, an async + * RPC call to write the page is scheduled immediately; otherwise, the call + * is delayed for a few seconds. + * + * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE. + * + * Write requests are kept on the inode's writeback list. Each entry in + * that list references the page (portion) to be written. When the + * cache timeout has expired, the RPC task is woken up, and tries to + * lock the page. As soon as it manages to do so, the request is moved + * from the writeback list to the writelock list. + * + * Note: we must make sure never to confuse the inode passed in the + * write_page request with the one in page->inode. As far as I understant + * it, these are different when doing a swap-out. + * + * To understand everything that goes one here and in the nfs read code, + * one should be aware that a page is locked in exactly one of the following + * cases: + * + * - A write request is in progress. + * - A user process is in generic_file_write/nfs_update_page + * - A user process is in generic_file_read + * + * Also note that because of the way pages are invalidated in + * nfs_revalidate_inode, the following assertions hold: + * + * - If a page is dirty, there will be no read requests (a page will + * not be re-read unless invalidated by nfs_revalidate_inode). + * - If the page is not uptodate, there will be no pending write + * requests, and no process will be in nfs_update_page. + * + * FIXME: Interaction with the vmscan routines is not optimal yet. + * Either vmscan must be made nfs-savvy, or we need a different page + * reclaim concept that supports something like FS-independent + * buffer_heads with a b_ops-> field. + * + * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> + */ + +#define NFS_NEED_XDR_TYPES +#include <linux/config.h> +#include <linux/types.h> +#include <linux/malloc.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> +#include <asm/uaccess.h> + +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + +static void nfs_wback_lock(struct rpc_task *task); +static void nfs_wback_result(struct rpc_task *task); + +/* + * This struct describes a file region to be written. + * It's kind of a pity we have to keep all these lists ourselves, rather + * than sticking an extra pointer into struct page. + */ +struct nfs_wreq { + struct rpc_listitem wb_list; /* linked list of req's */ + struct rpc_task wb_task; /* RPC task */ + struct inode * wb_inode; /* inode referenced */ + struct page * wb_page; /* page to be written */ + unsigned int wb_offset; /* offset within page */ + unsigned int wb_bytes; /* dirty range */ + pid_t wb_pid; /* owner process */ + unsigned short wb_flags; /* status flags */ + + struct nfs_writeargs * wb_args; /* NFS RPC stuff */ + struct nfs_fattr * wb_fattr; /* file attributes */ +}; +#define wb_status wb_task.tk_status + +#define WB_NEXT(req) ((struct nfs_wreq *) ((req)->wb_list.next)) + +/* + * Various flags for wb_flags + */ +#define NFS_WRITE_WANTLOCK 0x0001 /* needs to lock page */ +#define NFS_WRITE_LOCKED 0x0002 /* holds lock on page */ +#define NFS_WRITE_CANCELLED 0x0004 /* has been cancelled */ +#define NFS_WRITE_UNCOMMITTED 0x0008 /* written but uncommitted (NFSv3) */ +#define NFS_WRITE_INVALIDATE 0x0010 /* invalidate after write */ +#define NFS_WRITE_INPROGRESS 0x0020 /* RPC call in progress */ + +#define WB_INPROGRESS(req) ((req)->wb_flags & NFS_WRITE_INPROGRESS) +#define WB_WANTLOCK(req) ((req)->wb_flags & NFS_WRITE_WANTLOCK) +#define WB_HAVELOCK(req) ((req)->wb_flags & NFS_WRITE_LOCKED) +#define WB_CANCELLED(req) ((req)->wb_flags & NFS_WRITE_CANCELLED) +#define WB_UNCOMMITTED(req) ((req)->wb_flags & NFS_WRITE_UNCOMMITTED) +#define WB_INVALIDATE(req) ((req)->wb_flags & NFS_WRITE_INVALIDATE) + +/* + * Cache parameters + */ +#define NFS_WRITEBACK_DELAY (10 * HZ) +#define NFS_WRITEBACK_MAX 64 + +/* + * Limit number of delayed writes + */ +static int nr_write_requests = 0; +static struct rpc_wait_queue write_queue = RPC_INIT_WAITQ("write_chain"); +struct nfs_wreq * nfs_failed_requests = NULL; + +/* Hack for future NFS swap support */ +#ifndef IS_SWAPFILE +# define IS_SWAPFILE(inode) (0) +#endif + +/* + * Unlock a page after writing it + */ +static inline void +nfs_unlock_page(struct page *page) +{ + dprintk("NFS: unlock %ld\n", page->offset); + clear_bit(PG_locked, &page->flags); + wake_up(&page->wait); + +#ifdef CONFIG_NFS_SWAP + /* async swap-out support */ + if (clear_bit(PG_decr_after, &page->flags)) + atomic_dec(&page->count); + if (clear_bit(PG_swap_unlock_after, &page->flags)) + swap_after_unlock_page(page->swap_unlock_entry); +#endif +} + +/* + * Transfer a page lock to a write request waiting for it. + */ +static inline void +transfer_page_lock(struct nfs_wreq *req) +{ + dprintk("NFS: transfer_page_lock\n"); + + req->wb_flags &= ~NFS_WRITE_WANTLOCK; + req->wb_flags |= NFS_WRITE_LOCKED; + rpc_wake_up_task(&req->wb_task); + + dprintk("nfs: wake up task %d (flags %x)\n", + req->wb_task.tk_pid, req->wb_flags); +} + +/* + * Write a page synchronously. + * Offset is the data offset within the page. + */ +static int +nfs_writepage_sync(struct inode *inode, struct page *page, + unsigned long offset, unsigned int count) +{ + struct nfs_fattr fattr; + unsigned int wsize = NFS_SERVER(inode)->wsize; + int result, refresh = 0, written = 0; + u8 *buffer; + + dprintk("NFS: nfs_writepage_sync(%x/%ld %d@%ld)\n", + inode->i_dev, inode->i_ino, + count, page->offset + offset); + + buffer = (u8 *) page_address(page) + offset; + offset += page->offset; + + do { + if (count < wsize && !IS_SWAPFILE(inode)) + wsize = count; + + result = nfs_proc_write(NFS_SERVER(inode), NFS_FH(inode), + offset, wsize, IS_SWAPFILE(inode), + buffer, &fattr); + + if (result < 0) { + /* Must mark the page invalid after I/O error */ + clear_bit(PG_uptodate, &page->flags); + goto io_error; + } + refresh = 1; + buffer += wsize; + offset += wsize; + written += wsize; + count -= wsize; + } while (count); + +io_error: + if (refresh) { + /* See comments in nfs_wback_result */ + if (fattr.size < inode->i_size) + fattr.size = inode->i_size; + /* Solaris 2.5 server seems to send garbled + * fattrs occasionally */ + if (inode->i_ino == fattr.fileid) + nfs_refresh_inode(inode, &fattr); + } + + nfs_unlock_page(page); + return written? written : result; +} + +/* + * Append a writeback request to a list + */ +static inline void +append_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq) +{ + dprintk("NFS: append_write_request(%p, %p)\n", q, wreq); + rpc_append_list(q, wreq); +} + +/* + * Remove a writeback request from a list + */ +static inline void +remove_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq) +{ + dprintk("NFS: remove_write_request(%p, %p)\n", q, wreq); + rpc_remove_list(q, wreq); +} + +/* + * Find a write request for a given page + */ +static inline struct nfs_wreq * +find_write_request(struct inode *inode, struct page *page) +{ + struct nfs_wreq *head, *req; + + dprintk("NFS: find_write_request(%x/%ld, %p)\n", + inode->i_dev, inode->i_ino, page); + if (!(req = head = NFS_WRITEBACK(inode))) + return NULL; + do { + if (req->wb_page == page) + return req; + } while ((req = WB_NEXT(req)) != head); + return NULL; +} + +/* + * Find a failed write request by pid + */ +static inline struct nfs_wreq * +find_failed_request(struct inode *inode, pid_t pid) +{ + struct nfs_wreq *head, *req; + + if (!(req = head = nfs_failed_requests)) + return NULL; + do { + if (req->wb_inode == inode && req->wb_pid == pid) + return req; + } while ((req = WB_NEXT(req)) != head); + return NULL; +} + +/* + * Try to merge adjacent write requests. This works only for requests + * issued by the same user. + */ +static inline int +update_write_request(struct nfs_wreq *req, unsigned first, unsigned bytes) +{ + unsigned rqfirst = req->wb_offset, + rqlast = rqfirst + req->wb_bytes, + last = first + bytes; + + dprintk("nfs: trying to update write request %p\n", req); + + /* Check the credentials associated with this write request. + * If the buffer is owned by the same user, we can happily + * add our data without risking server permission problems. + * Note that I'm not messing around with RPC root override creds + * here, because they're used by swap requests only which + * always write out full pages. */ + if (!rpcauth_matchcred(&req->wb_task, req->wb_task.tk_cred)) { + dprintk("NFS: update failed (cred mismatch)\n"); + return 0; + } + + if (first < rqfirst) + rqfirst = first; + if (rqlast < last) + rqlast = last; + req->wb_offset = rqfirst; + req->wb_bytes = rqlast - rqfirst; + + return 1; +} + +/* + * Create and initialize a writeback request + */ +static inline struct nfs_wreq * +create_write_request(struct inode *inode, struct page *page, + unsigned offset, unsigned bytes) +{ + struct nfs_wreq *wreq; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct rpc_task *task; + + dprintk("NFS: create_write_request(%x/%ld, %ld+%d)\n", + inode->i_dev, inode->i_ino, + page->offset + offset, bytes); + + /* FIXME: Enforce hard limit on number of concurrent writes? */ + + wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_USER); + if (!wreq) + return NULL; + memset(wreq, 0, sizeof(*wreq)); + + task = &wreq->wb_task; + rpc_init_task(task, clnt, nfs_wback_result, 0); + task->tk_calldata = wreq; + task->tk_action = nfs_wback_lock; + + rpcauth_lookupcred(task); /* Obtain user creds */ + if (task->tk_status < 0) { + rpc_release_task(task); + kfree(wreq); + return NULL; + } + + /* Put the task on inode's writeback request list. */ + wreq->wb_inode = inode; + wreq->wb_pid = current->pid; + wreq->wb_page = page; + wreq->wb_offset = offset; + wreq->wb_bytes = bytes; + inode->i_count++; + atomic_inc(&page->count); + + append_write_request(&NFS_WRITEBACK(inode), wreq); + + if (nr_write_requests++ > NFS_WRITEBACK_MAX*3/4) + rpc_wake_up_next(&write_queue); + + return wreq; +} + +/* + * Schedule a writeback RPC call. + * If the server is congested, don't add to our backlog of queued + * requests but call it synchronously. + * The function returns true if the page has been unlocked as the + * consequence of a synchronous write call. + * + * FIXME: Here we could walk the inode's lock list to see whether the + * page we're currently writing to has been write-locked by the caller. + * If it is, we could schedule an async write request with a long + * delay in order to avoid writing back the page until the lock is + * released. + */ +static inline int +schedule_write_request(struct nfs_wreq *req, int sync) +{ + struct rpc_task *task = &req->wb_task; + struct inode *inode = req->wb_inode; + + if (NFS_CONGESTED(inode) || nr_write_requests >= NFS_WRITEBACK_MAX) + sync = 1; + + if (sync) { + dprintk("NFS: %4d schedule_write_request (sync)\n", + task->tk_pid); + /* Page is already locked */ + req->wb_flags |= NFS_WRITE_LOCKED; + rpc_execute(task); + } else { + dprintk("NFS: %4d schedule_write_request (async)\n", + task->tk_pid); + task->tk_flags |= RPC_TASK_ASYNC; + task->tk_timeout = NFS_WRITEBACK_DELAY; + rpc_sleep_on(&write_queue, task, NULL, NULL); + } + + return sync == 0; +} + +/* + * Wait for request to complete + * This is almost a copy of __wait_on_page + */ +static inline int +wait_on_write_request(struct nfs_wreq *req) +{ + struct wait_queue wait = { current, NULL }; + struct page *page = req->wb_page; + + add_wait_queue(&page->wait, &wait); + atomic_inc(&page->count); +repeat: + current->state = TASK_INTERRUPTIBLE; + if (PageLocked(page)) { + schedule(); + goto repeat; + } + remove_wait_queue(&page->wait, &wait); + current->state = TASK_RUNNING; + atomic_dec(&page->count); + return signalled()? -ERESTARTSYS : 0; +} + +/* + * Write a page to the server. This will be used for NFS swapping only + * (for now), and we currently do this synchronously only. + */ +int +nfs_writepage(struct inode *inode, struct page *page) +{ + return nfs_writepage_sync(inode, page, 0, PAGE_SIZE); +} + +/* + * Update and possibly write a cached page of an NFS file. + * The page is already locked when we get here. + * + * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad + * things with a page scheduled for an RPC call (e.g. invalidate it). + */ +int +nfs_updatepage(struct inode *inode, struct page *page, const char *buffer, + unsigned long offset, unsigned int count, int sync) +{ + struct nfs_wreq *req; + int status = 0, page_locked = 1; + u8 *page_addr; + + dprintk("NFS: nfs_updatepage(%x/%ld %d@%ld, sync=%d)\n", + inode->i_dev, inode->i_ino, + count, page->offset+offset, sync); + + page_addr = (u8 *) page_address(page); + + /* If wsize is smaller than page size, update and write + * page synchronously. + */ + if (NFS_SERVER(inode)->wsize < PAGE_SIZE) { + copy_from_user(page_addr + offset, buffer, count); + return nfs_writepage_sync(inode, page, offset, count); + } + + /* + * Try to find a corresponding request on the writeback queue. + * If there is one, we can be sure that this request is not + * yet being processed, because we hold a lock on the page. + * + * If the request was created by us, update it. Otherwise, + * transfer the page lock and flush out the dirty page now. + * After returning, generic_file_write will wait on the + * page and retry the update. + */ + if ((req = find_write_request(inode, page)) != NULL) { + if (update_write_request(req, offset, count)) { + copy_from_user(page_addr + offset, buffer, count); + goto updated; + } + dprintk("NFS: wake up conflicting write request.\n"); + transfer_page_lock(req); + return 0; + } + + /* Create the write request. */ + if (!(req = create_write_request(inode, page, offset, count))) { + status = -ENOBUFS; + goto done; + } + + /* Copy data to page buffer. */ + copy_from_user(page_addr + offset, buffer, count); + + /* Schedule request */ + page_locked = schedule_write_request(req, sync); + +updated: + /* + * If we wrote up to the end of the chunk, transmit request now. + * We should be a bit more intelligent about detecting whether a + * process accesses the file sequentially or not. + */ + if (page_locked && (offset + count >= PAGE_SIZE || sync)) + req->wb_flags |= NFS_WRITE_WANTLOCK; + + /* If the page was written synchronously, return any error that + * may have happened; otherwise return the write count. */ + if (page_locked || (status = nfs_write_error(inode)) >= 0) + status = count; + +done: + /* Unlock page and wake up anyone sleeping on it */ + if (page_locked) { + if (req && WB_WANTLOCK(req)) { + transfer_page_lock(req); + /* rpc_execute(&req->wb_task); */ + if (sync) { + wait_on_write_request(req); + if ((count = nfs_write_error(inode)) < 0) + status = count; + } + } else + nfs_unlock_page(page); + } + + dprintk("NFS: nfs_updatepage returns %d (isize %ld)\n", + status, inode->i_size); + return status; +} + +/* + * Flush out a dirty page. + */ +static inline void +nfs_flush_request(struct nfs_wreq *req) +{ + struct page *page = req->wb_page; + + dprintk("NFS: nfs_flush_request(%x/%ld, @%ld)\n", + page->inode->i_dev, page->inode->i_ino, + page->offset); + + req->wb_flags |= NFS_WRITE_WANTLOCK; + if (!set_bit(PG_locked, &page->flags)) { + transfer_page_lock(req); + } else { + printk(KERN_WARNING "NFS oops in %s: can't lock page!\n", + __FUNCTION__); + rpc_wake_up_task(&req->wb_task); + } +} + +/* + * Flush writeback requests. See nfs_flush_dirty_pages for details. + */ +static struct nfs_wreq * +nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len, + int invalidate) +{ + struct nfs_wreq *head, *req, *last = NULL; + off_t rqoffset, rqend, end; + + end = len? offset + len : 0x7fffffffUL; + + req = head = NFS_WRITEBACK(inode); + while (req != NULL) { + dprintk("NFS: %4d nfs_flush inspect %x/%ld @%ld fl %x\n", + req->wb_task.tk_pid, + req->wb_inode->i_dev, req->wb_inode->i_ino, + req->wb_page->offset, req->wb_flags); + if (!WB_INPROGRESS(req)) { + rqoffset = req->wb_page->offset + req->wb_offset; + rqend = rqoffset + req->wb_bytes; + + if (rqoffset < end && offset < rqend + && (pid == 0 || req->wb_pid == pid)) { + if (!WB_HAVELOCK(req)) + nfs_flush_request(req); + last = req; + } + } + if (invalidate) + req->wb_flags |= NFS_WRITE_INVALIDATE; + if ((req = WB_NEXT(req)) == head) + break; + } + + return last; +} + +/* + * Cancel all writeback requests, both pending and in process. + */ +static void +nfs_cancel_dirty(struct inode *inode, pid_t pid) +{ + struct nfs_wreq *head, *req; + + req = head = NFS_WRITEBACK(inode); + while (req != NULL) { + if (req->wb_pid == pid) { + req->wb_flags |= NFS_WRITE_CANCELLED; + rpc_exit(&req->wb_task, 0); + } + if ((req = WB_NEXT(req)) == head) + break; + } +} + +/* + * Flush out all dirty pages belonging to a certain user process and + * maybe wait for the RPC calls to complete. + * + * Another purpose of this function is sync()ing a file range before a + * write lock is released. This is what offset and length are for, even if + * this isn't used by the nlm module yet. + */ +int +nfs_flush_dirty_pages(struct inode *inode, off_t offset, off_t len) +{ + struct nfs_wreq *last = NULL; + + dprintk("NFS: flush_dirty_pages(%x/%ld for pid %d %ld/%ld)\n", + inode->i_dev, inode->i_ino, current->pid, + offset, len); + + if (signalled()) + nfs_cancel_dirty(inode, current->pid); + + while (!signalled()) { + /* Flush all pending writes for this pid and file region */ + last = nfs_flush_pages(inode, current->pid, offset, len, 0); + if (last == NULL) + break; + wait_on_write_request(last); + } + + return signalled()? -ERESTARTSYS : 0; +} + +/* + * Flush out any pending write requests and flag that they be discarded + * after the write is complete. + * + * This function is called from nfs_revalidate_inode just before it calls + * invalidate_inode_pages. After nfs_flush_pages returns, we can be sure + * that all dirty pages are locked, so that invalidate_inode_pages does + * not throw away any dirty pages. + */ +void +nfs_invalidate_pages(struct inode *inode) +{ + dprintk("NFS: nfs_invalidate_pages(%x/%ld)\n", + inode->i_dev, inode->i_ino); + + nfs_flush_pages(inode, 0, 0, 0, 1); +} + +/* + * Cancel any pending write requests after a given offset + * (called from nfs_notify_change). + */ +int +nfs_truncate_dirty_pages(struct inode *inode, unsigned long offset) +{ + struct nfs_wreq *req, *head; + unsigned long rqoffset; + + dprintk("NFS: truncate_dirty_pages(%x/%ld, %ld)\n", + inode->i_dev, inode->i_ino, offset); + + req = head = NFS_WRITEBACK(inode); + while (req != NULL) { + rqoffset = req->wb_page->offset + req->wb_offset; + + if (rqoffset >= offset) { + req->wb_flags |= NFS_WRITE_CANCELLED; + rpc_exit(&req->wb_task, 0); + } else if (rqoffset + req->wb_bytes >= offset) { + req->wb_bytes = offset - rqoffset; + } + if ((req = WB_NEXT(req)) == head) + break; + } + + return 0; +} + +/* + * Check if a previous write operation returned an error + */ +int +nfs_check_error(struct inode *inode) +{ + struct nfs_wreq *req; + int status = 0; + + dprintk("nfs: checking for write error inode %04x/%ld\n", + inode->i_dev, inode->i_ino); + + if (!(req = find_failed_request(inode, current->pid))) + return 0; + + dprintk("nfs: write error %d inode %04x/%ld\n", + req->wb_task.tk_status, inode->i_dev, inode->i_ino); + + status = req->wb_task.tk_status; + remove_write_request(&nfs_failed_requests, req); + iput(req->wb_inode); + kfree(req); + return status; +} + +/* + * The following procedures make up the writeback finite state machinery: + * + * 1. Try to lock the page if not yet locked by us, + * set up the RPC call info, and pass to the call FSM. + */ +static void +nfs_wback_lock(struct rpc_task *task) +{ + struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata; + struct page *page = req->wb_page; + struct inode *inode = req->wb_inode; + + dprintk("NFS: %4d nfs_wback_lock (status %d flags %x)\n", + task->tk_pid, task->tk_status, req->wb_flags); + + if (!WB_HAVELOCK(req)) + req->wb_flags |= NFS_WRITE_WANTLOCK; + + if (WB_WANTLOCK(req) && set_bit(PG_locked, &page->flags)) { + dprintk("NFS: page already locked in writeback_lock!\n"); + task->tk_timeout = 2 * HZ; + rpc_sleep_on(&write_queue, task, NULL, NULL); + return; + } + task->tk_status = 0; + req->wb_flags &= ~NFS_WRITE_WANTLOCK; + req->wb_flags |= NFS_WRITE_LOCKED; + + if (req->wb_args == 0) { + size_t size = sizeof(struct nfs_writeargs) + + sizeof(struct nfs_fattr); + void *ptr; + + if (!(ptr = kmalloc(size, GFP_KERNEL))) { + task->tk_timeout = HZ; + rpc_sleep_on(&write_queue, task, NULL, NULL); + return; + } + req->wb_args = (struct nfs_writeargs *) ptr; + req->wb_fattr = (struct nfs_fattr *) (req->wb_args + 1); + } + + /* Setup the task struct for a writeback call */ + req->wb_args->fh = NFS_FH(inode); + req->wb_args->offset = page->offset + req->wb_offset; + req->wb_args->count = req->wb_bytes; + req->wb_args->buffer = (void *) (page_address(page) + req->wb_offset); + + rpc_call_setup(task, NFSPROC_WRITE, req->wb_args, req->wb_fattr, 0); + + req->wb_flags |= NFS_WRITE_INPROGRESS; +} + +/* + * 2. Collect the result + */ +static void +nfs_wback_result(struct rpc_task *task) +{ + struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata; + struct inode *inode; + struct page *page; + int status; + + dprintk("NFS: %4d nfs_wback_result (status %d)\n", + task->tk_pid, task->tk_status); + + inode = req->wb_inode; + page = req->wb_page; + status = task->tk_status; + + /* Remove request from writeback list and wake up tasks + * sleeping on it. */ + remove_write_request(&NFS_WRITEBACK(inode), req); + + if (status < 0) { + /* + * An error occurred. Report the error back to the + * application by adding the failed request to the + * inode's error list. + */ + if (find_failed_request(inode, req->wb_pid)) { + status = 0; + } else { + dprintk("NFS: %4d saving write failure code\n", + task->tk_pid); + append_write_request(&nfs_failed_requests, req); + inode->i_count++; + } + clear_bit(PG_uptodate, &page->flags); + } else if (!WB_CANCELLED(req)) { + /* Update attributes as result of writeback. + * Beware: when UDP replies arrive out of order, we + * may end up overwriting a previous, bigger file size. + */ + if (req->wb_fattr->size < inode->i_size) + req->wb_fattr->size = inode->i_size; + /* possible Solaris 2.5 server bug workaround */ + if (inode->i_ino == req->wb_fattr->fileid) + nfs_refresh_inode(inode, req->wb_fattr); + } + + rpc_release_task(task); + + if (WB_INVALIDATE(req)) + clear_bit(PG_uptodate, &page->flags); + if (WB_HAVELOCK(req)) + nfs_unlock_page(page); + + if (req->wb_args) { + kfree(req->wb_args); + req->wb_args = 0; + } + if (status >= 0) + kfree(req); + + free_page(page_address(page)); + iput(inode); + nr_write_requests--; +} diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile new file mode 100644 index 000000000..111adfbf0 --- /dev/null +++ b/fs/nfsd/Makefile @@ -0,0 +1,20 @@ +# +# Makefile for the linux nfs-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +O_TARGET := nfsd.o +O_OBJS := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o nfsxdr.o + +ifdef CONFIG_PROC_FS + O_OBJS += stats.o +endif + +M_OBJS := $(O_TARGET) + +include $(TOPDIR)/Rules.make diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c new file mode 100644 index 000000000..c2346d458 --- /dev/null +++ b/fs/nfsd/auth.c @@ -0,0 +1,47 @@ +/* + * linux/fs/nfsd/auth.c + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svcauth.h> +#include <linux/nfsd/nfsd.h> + +void +nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) +{ + struct svc_cred *cred = &rqstp->rq_cred; + int i; + + if (rqstp->rq_userset) + return; + + if (exp->ex_flags & NFSEXP_ALLSQUASH) { + cred->cr_uid = exp->ex_anon_uid; + cred->cr_gid = exp->ex_anon_gid; + cred->cr_groups[0] = NOGROUP; + } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) { + if (!cred->cr_uid) + cred->cr_uid = exp->ex_anon_uid; + if (!cred->cr_gid) + cred->cr_gid = exp->ex_anon_gid; + for (i = 0; i < NGROUPS; i++) + if (!cred->cr_groups[i]) + cred->cr_groups[i] = exp->ex_anon_gid; + } + + if (cred->cr_uid != (uid_t) -1) + current->fsuid = cred->cr_uid; + else + current->fsuid = exp->ex_anon_uid; + if (cred->cr_gid != (gid_t) -1) + current->fsgid = cred->cr_gid; + else + current->fsgid = exp->ex_anon_gid; + for (i = 0; i < NGROUPS; i++) + current->groups[i] = cred->cr_groups[i]; + rqstp->rq_userset = 1; +} diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c new file mode 100644 index 000000000..c83150b5f --- /dev/null +++ b/fs/nfsd/export.c @@ -0,0 +1,677 @@ +/* + * linux/fs/nfsd/export.c + * + * NFS exporting and validation. + * + * We maintain a list of clients, each of which has a list of + * exports. To export an fs to a given client, you first have + * to create the client entry with NFSCTL_ADDCLIENT, which + * creates a client control block and adds it to the hash + * table. Then, you call NFSCTL_EXPORT for each fs. + * + * You cannot currently read the export information from the + * kernel. It would be nice to have a /proc file though. + * + * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de> + */ + +#include <linux/unistd.h> +#include <linux/malloc.h> +#include <linux/stat.h> +#include <linux/in.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/nfsfh.h> +#include <linux/nfsd/syscall.h> +#include <linux/lockd/bind.h> + +typedef struct svc_client svc_client; +typedef struct svc_export svc_export; + +static svc_export * exp_find(svc_client *clp, dev_t dev); +static svc_export * exp_parent(svc_client *clp, dev_t dev); +static void exp_unexport_all(svc_client *clp); +static void exp_do_unexport(svc_export *unexp); +static svc_client * exp_getclientbyname(char *name); +static void exp_freeclient(svc_client *clp); +static void exp_unhashclient(svc_client *clp); +static int exp_verify_string(char *cp, int max); +struct inode * exp_lnamei(char *pathname, int *errp); + +#define NFSDDBG_FACILITY NFSDDBG_EXPORT +#define CLIENT_HASHBITS 6 +#define CLIENT_HASHMAX (1 << CLIENT_HASHBITS) +#define CLIENT_HASHMASK (CLIENT_HASHMAX - 1) +#define CLIENT_HASH(a) \ + ((((a)>>24) ^ ((a)>>16) ^ ((a)>>8) ^(a)) & CLIENT_HASHMASK) +#define EXPORT_HASH(dev) ((dev) & (NFSCLNT_EXPMAX - 1)) + +struct svc_clnthash { + struct svc_clnthash * h_next; + struct in_addr h_addr; + struct svc_client * h_client; +}; +static struct svc_clnthash * clnt_hash[CLIENT_HASHMAX]; +static svc_client * clients = NULL; +static int initialized = 0; + +static int hash_lock = 0; +static int want_lock = 0; +static int hash_count = 0; +static struct wait_queue * hash_wait = NULL; + +#define READLOCK 0 +#define WRITELOCK 1 + +/* + * Find the export entry matching xdev/xino. + */ +static inline svc_export * +exp_find(svc_client *clp, dev_t dev) +{ + svc_export * exp; + + exp = clp->cl_export[EXPORT_HASH(dev)]; + while (exp && exp->ex_dev != dev) + exp = exp->ex_next; + return exp; +} + +svc_export * +exp_get(svc_client *clp, dev_t dev, ino_t ino) +{ + svc_export * exp; + + if (!clp) + return NULL; + exp = exp_find(clp, dev); + return (exp && exp->ex_ino == ino)? exp : NULL; +} + +/* + * Find the parent export entry for a given fs. This function is used + * only by the export syscall to keep the export tree consistent. + * nfsd_parentdev(dev) returns the device on which dev is mounted. + */ +static svc_export * +exp_parent(svc_client *clp, dev_t dev) +{ + svc_export *exp; + dev_t xdev = dev; + + do { + exp = exp_find(clp, xdev); + if (exp) + return exp; + } while (nfsd_parentdev(&xdev)); + + return NULL; +} + +/* + * Export a file system. + */ +int +exp_export(struct nfsctl_export *nxp) +{ + svc_client *clp; + svc_export *exp, *parent; + svc_export **head; + struct inode *inode = NULL; + int i, err; + dev_t dev; + ino_t ino; + + /* Consistency check */ + if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) || + !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) + return -EINVAL; + + dprintk("exp_export called for %s:%s (%x/%ld fl %x).\n", + nxp->ex_client, nxp->ex_path, + nxp->ex_dev, nxp->ex_ino, nxp->ex_flags); + dev = nxp->ex_dev; + ino = nxp->ex_ino; + + /* Try to lock the export table for update */ + if ((err = exp_writelock()) < 0) + return err; + + /* Look up client info */ + if (!(clp = exp_getclientbyname(nxp->ex_client))) { + err = -EINVAL; + goto finish; + } + + /* + * If there's already an export for this file, assume this + * is just a flag update. + */ + if ((exp = exp_find(clp, dev)) != NULL) { + /* Ensure there's only one export per FS. */ + if (exp->ex_ino != ino) { + err = -EPERM; + } else { + exp->ex_flags = nxp->ex_flags; + exp->ex_anon_uid = nxp->ex_anon_uid; + exp->ex_anon_gid = nxp->ex_anon_gid; + err = 0; + } + goto finish; + } + + /* Look up the inode */ + if (!(inode = nfsd_iget(nxp->ex_dev, nxp->ex_ino))) { + err = -EINVAL; + goto finish; + } + + /* We currently export only dirs. */ + if (!S_ISDIR(inode->i_mode)) { + err = -ENOTDIR; + goto finish; + } + + /* If this is a sub-export, must be root of FS */ + if ((parent = exp_parent(clp, dev)) != NULL) { + struct super_block *sb; + + if ((sb = inode->i_sb) && (inode != sb->s_mounted)) { + err = -EINVAL; + goto finish; + } + } + + if (!(exp = kmalloc(sizeof(*exp), GFP_USER))) { + err = -ENOMEM; + goto finish; + } + dprintk("nfsd: created export entry %p for client %p\n", exp, clp); + + strcpy(exp->ex_path, nxp->ex_path); + exp->ex_client = clp; + exp->ex_parent = parent; + exp->ex_inode = inode; + exp->ex_flags = nxp->ex_flags; + exp->ex_dev = dev; + exp->ex_ino = ino; + exp->ex_anon_uid = nxp->ex_anon_uid; + exp->ex_anon_gid = nxp->ex_anon_gid; + + /* Update parent pointers of all exports */ + if (parent) { + for (i = 0; i < NFSCLNT_EXPMAX; i++) { + svc_export *temp = clp->cl_export[i]; + + while (temp) { + if (temp->ex_parent == parent) + temp->ex_parent = exp; + temp = temp->ex_next; + } + } + } + + head = clp->cl_export + EXPORT_HASH(dev); + exp->ex_next = *head; + *head = exp; + + err = 0; + +finish: + /* Release inode */ + if (err < 0 && inode) + iput(inode); + /* Unlock hashtable */ + exp_unlock(); + return err; +} + +/* + * Unexport a file system. The export entry has already + * been removed from the client's list of exported fs's. + */ +static void +exp_do_unexport(svc_export *unexp) +{ + svc_export *exp; + svc_client *clp; + struct inode *inode; + int i; + + /* Update parent pointers. */ + clp = unexp->ex_client; + for (i = 0; i < NFSCLNT_EXPMAX; i++) { + for (exp = clp->cl_export[i]; exp; exp = exp->ex_next) + if (exp->ex_parent == unexp) + exp->ex_parent = unexp->ex_parent; + } + + inode = unexp->ex_inode; + if (unexp->ex_dev != inode->i_dev || unexp->ex_ino != inode->i_ino) + printk(KERN_WARNING "nfsd: bad inode in unexport!\n"); + else + iput(unexp->ex_inode); + + kfree(unexp); +} + +/* + * Revoke all exports for a given client. + * This may look very awkward, but we have to do it this way in order + * to avoid race conditions (aka mind the parent pointer). + */ +static void +exp_unexport_all(svc_client *clp) +{ + svc_export *exp; + int i; + + dprintk("unexporting all fs's for clnt %p\n", clp); + for (i = 0; i < NFSCLNT_EXPMAX; i++) { + exp = clp->cl_export[i]; + clp->cl_export[i] = NULL; + while (exp) { + svc_export *next = exp->ex_next; + exp_do_unexport(exp); + exp = next; + } + } +} + +/* + * unexport syscall. + */ +int +exp_unexport(struct nfsctl_export *nxp) +{ + svc_client *clp; + svc_export **expp, *exp = NULL; + int err; + + /* Consistency check */ + if (!exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) + return -EINVAL; + + if ((err = exp_writelock()) < 0) + return err; + + err = -EINVAL; + if ((clp = exp_getclientbyname(nxp->ex_client)) != NULL) { + expp = clp->cl_export + EXPORT_HASH(nxp->ex_dev); + while ((exp = *expp) != NULL) { + if (exp->ex_dev == nxp->ex_dev) { + if (exp->ex_ino != nxp->ex_ino) + break; + *expp = exp->ex_next; + exp_do_unexport(exp); + err = 0; + break; + } + expp = &(exp->ex_next); + } + } + + exp_unlock(); + return err; +} + +/* + * Obtain the root fh on behalf of a client. + * This could be done in user space, but I feel that it adds some safety + * since its harder to fool a kernel module than a user space program. + */ +int +exp_rootfh(struct svc_client *clp, dev_t dev, ino_t ino, struct knfs_fh *f) +{ + struct svc_export *exp = NULL; + struct svc_fh fh; + + dprintk("nfsd: exp_rootfh(%s:%x/%ld)\n", clp->cl_ident, dev, ino); + + if (!(exp = exp_get(clp, dev, ino))) + return -EPERM; + exp->ex_inode->i_count++; + fh_compose(&fh, exp, exp->ex_inode); + memcpy(f, &fh.fh_handle, sizeof(struct knfs_fh)); + fh_put(&fh); + + return 0; +} + +/* + * Hashtable locking. Write locks are placed only by user processes + * wanting to modify export information. + */ +void +exp_readlock(void) +{ + while (hash_lock || want_lock) + sleep_on(&hash_wait); + hash_count++; +} + +int +exp_writelock(void) +{ + want_lock++; + while (hash_count || hash_lock) + interruptible_sleep_on(&hash_wait); + want_lock--; + if (current->signal & ~current->blocked) + return -EINTR; + hash_lock = 1; + return 0; +} + +void +exp_unlock(void) +{ + if (hash_count) + hash_count--; + else + hash_lock = 0; + wake_up(&hash_wait); +} + +/* + * Find a valid client given an inet address. We always move the most + * recently used client to the front of the hash chain to speed up + * future lookups. + * Locking against other processes is the responsibility of the caller. + */ +struct svc_client * +exp_getclient(struct sockaddr_in *sin) +{ + struct svc_clnthash **hp, **head, *tmp; + unsigned long addr = sin->sin_addr.s_addr; + + if (!initialized) + return NULL; + + head = &clnt_hash[CLIENT_HASH(addr)]; + + for (hp = head; (tmp = *hp) != NULL; hp = &(tmp->h_next)) { + if (tmp->h_addr.s_addr == addr) { + /* Move client to the front */ + if (head != hp) { + *hp = tmp->h_next; + tmp->h_next = *head; + *head = tmp; + } + + return tmp->h_client; + } + } + + return NULL; +} + +/* + * Find a client given its identifier. + */ +static svc_client * +exp_getclientbyname(char *ident) +{ + svc_client * clp; + + for (clp = clients; clp; clp = clp->cl_next) { + if (!strcmp(clp->cl_ident, ident)) + return clp; + } + return NULL; +} + +/* + * Add or modify a client. + * Change requests may involve the list of host addresses. The list of + * exports and possibly existing uid maps are left untouched. + */ +int +exp_addclient(struct nfsctl_client *ncp) +{ + struct svc_clnthash * ch[NFSCLNT_ADDRMAX]; + svc_client * clp; + int i, err, change = 0, ilen; + + /* First, consistency check. */ + if (!(ilen = exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX))) + return -EINVAL; + if (ncp->cl_naddr > NFSCLNT_ADDRMAX) + return -EINVAL; + + /* Lock the hashtable */ + if ((err = exp_writelock()) < 0) + return err; + + /* First check if this is a change request for a client. */ + for (clp = clients; clp; clp = clp->cl_next) + if (!strcmp(clp->cl_ident, ncp->cl_ident)) + break; + + if (clp) { + change = 1; + } else { + if (!(clp = kmalloc(sizeof(*clp), GFP_KERNEL))) { + exp_unlock(); + return -ENOMEM; + } + memset(clp, 0, sizeof(*clp)); + + dprintk("created client %s (%p)\n", ncp->cl_ident, clp); + + strcpy(clp->cl_ident, ncp->cl_ident); + clp->cl_idlen = ilen; + } + + /* Allocate hash buckets */ + for (i = 0; i < ncp->cl_naddr; i++) { + if (!(ch[i] = kmalloc(GFP_KERNEL, sizeof(ch[0])))) { + while (i--) + kfree(ch[i]); + if (!change) + kfree(clp); + exp_unlock(); + return -ENOMEM; + } + } + + /* Copy addresses. */ + for (i = 0; i < ncp->cl_naddr; i++) { + clp->cl_addr[i] = ncp->cl_addrlist[i]; + } + clp->cl_naddr = ncp->cl_naddr; + + /* Remove old client hash entries. */ + if (change) + exp_unhashclient(clp); + + /* Insert client into hashtable. */ + for (i = 0; i < ncp->cl_naddr; i++) { + struct in_addr addr = clp->cl_addr[i]; + int hash; + + hash = CLIENT_HASH(addr.s_addr); + ch[i]->h_client = clp; + ch[i]->h_addr = addr; + ch[i]->h_next = clnt_hash[hash]; + clnt_hash[hash] = ch[i]; + } + + if (!change) { + clp->cl_next = clients; + clients = clp; + } + + exp_unlock(); + return 0; +} + +/* + * Delete a client given an identifier. + */ +int +exp_delclient(struct nfsctl_client *ncp) +{ + svc_client **clpp, *clp; + int err; + + if (!exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX)) + return -EINVAL; + + /* Lock the hashtable */ + if ((err = exp_writelock()) < 0) + return err; + + for (clpp = &clients; (clp = *clpp); clpp = &(clp->cl_next)) + if (!strcmp(ncp->cl_ident, clp->cl_ident)) + break; + + if (!clp) { + exp_unlock(); + return -EINVAL; + } + *clpp = clp->cl_next; + exp_freeclient(clp); + + exp_unlock(); + return 0; +} + +/* + * Free a client. The caller has already removed it from the client list. + */ +static void +exp_freeclient(svc_client *clp) +{ + exp_unhashclient(clp); + + /* umap_free(&(clp->cl_umap)); */ + exp_unexport_all(clp); + nfsd_lockd_unexport(clp); + kfree (clp); +} + +/* + * Remove client from hashtable. We first collect all hashtable + * entries and free them in one go. + * The hash table must be writelocked by the caller. + */ +static void +exp_unhashclient(svc_client *clp) +{ + struct svc_clnthash **hpp, *hp, *ch[NFSCLNT_ADDRMAX]; + int i, count, err; + +again: + err = 0; + for (i = 0, count = 0; i < CLIENT_HASHMAX && !err; i++) { + hpp = clnt_hash + i; + while ((hp = *hpp) && !err) { + if (hp->h_client == clp) { + *hpp = hp->h_next; + ch[count++] = hp; + err = (count >= NFSCLNT_ADDRMAX); + } else { + hpp = &(hp->h_next); + } + } + } + if (count != clp->cl_naddr) + printk(KERN_WARNING "nfsd: bad address count in freeclient!\n"); + if (err) + goto again; + for (i = 0; i < count; i++) + kfree (ch[i]); +} + +/* + * Lockd is shutting down and tells us to unregister all clients + */ +void +exp_nlmdetach(void) +{ + struct svc_client *clp; + + for (clp = clients; clp; clp = clp->cl_next) + nfsd_lockd_unexport(clp); +} + +/* + * Verify that string is non-empty and does not exceed max length. + */ +static int +exp_verify_string(char *cp, int max) +{ + int i; + + for (i = 0; i < max; i++) + if (!cp[i]) + return i; + cp[i] = 0; + printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); + return 0; +} + +#if 0 +/* + * Get the inode associated with a pathname. Used by exp_export. + */ +struct inode * +exp_lnamei(char *pathname, int *errp) +{ + struct inode *inode; + unsigned long oldfs; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + *errp = lnamei(pathname, &inode); + set_fs(oldfs); + + return inode; +} +#endif + +/* + * Initialize the exports module. + */ +void +nfsd_export_init(void) +{ + int i; + + dprintk("nfsd: initializing export module.\n"); + if (initialized) + return; + for (i = 0; i < CLIENT_HASHMAX; i++) + clnt_hash[i] = NULL; + clients = NULL; + + initialized = 1; + return; +} + +/* + * Shutdown the exports module. + */ +void +nfsd_export_shutdown(void) +{ + int i; + + dprintk("nfsd: shutting down export module.\n"); + if (!initialized) + return; + if (exp_writelock() < 0) { + printk(KERN_WARNING "Weird: hashtable locked in exp_shutdown"); + return; + } + for (i = 0; i < CLIENT_HASHMAX; i++) { + while (clnt_hash[i]) + exp_freeclient(clnt_hash[i]->h_client); + } + exp_unlock(); + dprintk("nfsd: export shutdown complete.\n"); + + return; +} diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c new file mode 100644 index 000000000..3732ebcce --- /dev/null +++ b/fs/nfsd/lockd.c @@ -0,0 +1,70 @@ +/* + * linux/fs/nfsd/lockd.c + * + * This file contains all the stubs needed when communicating with lockd. + * This level of indirection is necessary so we can run nfsd+lockd without + * requiring the nfs client to be compiled in/loaded, and vice versa. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/lockd/bind.h> + +#define NFSDDBG_FACILITY NFSDDBG_LOCKD + +static u32 +nlm_fopen(struct svc_rqst *rqstp, struct knfs_fh *f, struct file *filp) +{ + struct svc_fh fh; + u32 nfserr; + + fh.fh_handle = *f; + fh.fh_export = NULL; + fh.fh_inode = NULL; + + nfserr = nfsd_open(rqstp, &fh, S_IFREG, 0, filp); + fh_put(&fh); + return nfserr; +} + +static void +nlm_fclose(struct file *filp) +{ + nfsd_close(filp); +} + +struct nlmsvc_binding nfsd_nlm_ops = { + exp_readlock, /* lock export table for reading */ + exp_unlock, /* unlock export table */ + exp_getclient, /* look up NFS client */ + nlm_fopen, /* open file for locking */ + nlm_fclose, /* close file */ + exp_nlmdetach, /* lockd shutdown notification */ +}; + +/* + * When removing an NFS client entry, notify lockd that it is gone. + * FIXME: We should do the same when unexporting an NFS volume. + */ +void +nfsd_lockd_unexport(struct svc_client *clnt) +{ + nlmsvc_invalidate_client(clnt); +} + +void +nfsd_lockd_init(void) +{ + dprintk("nfsd: initializing lockd\n"); + nlmsvc_ops = &nfsd_nlm_ops; +} + +void +nfsd_lockd_shutdown(void) +{ + nlmsvc_ops = NULL; +} diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c new file mode 100644 index 000000000..fb1c0878b --- /dev/null +++ b/fs/nfsd/nfs3proc.c @@ -0,0 +1,529 @@ +/* + * linux/fs/nfsd/nfs3proc.c + * + * Process version 3 NFS requests. + * + * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/linkage.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/net.h> +#include <linux/in.h> +#include <linux/version.h> +#include <linux/unistd.h> +#include <linux/malloc.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr3.h> + +typedef struct svc_rqst svc_rqst; +typedef struct svc_buf svc_buf; + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +#define RETURN(st) { resp->status = (st); return (st); } + +static void +svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr) +{ + *ptr = buf->buf + nr; + *len = buf->buflen - buf->len - nr; +} + +static int +nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get a file's attributes + */ +static int +nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, + struct nfsd3_attrstat *resp) +{ + int nfserr; + + dprintk("nfsd: GETATTR %x/%ld\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh)); + + resp->fh = argp->fh; + nfserr = fh_lookup(rqstp, &resp->fh, 0, MAY_NOP); + RETURN(nfserr); +} + +/* + * Set a file's attributes + */ +static int +nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp, + struct nfsd3_attrstat *resp) +{ + int nfserr; + + dprintk("nfsd: SETATTR %x/%ld\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh)); + + resp->fh = argp->fh; + nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs); + RETURN(nfserr); +} + +/* + * Look up a path name component + */ +static int +nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, + struct nfsd3_lookupres *resp) +{ + int nfserr; + + dprintk("nfsd: LOOKUP %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + resp->dirfh = argp->fh; + nfserr = nfsd_lookup(rqstp, &resp->dirfh, + argp->name, + argp->len, + &resp->fh); + RETURN(nfserr); +} + +/* + * Check file access + */ +static int +nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, + struct nfsd3_accessres *resp) +{ + /* to be done */ + resp->fh = argp->fh; + return nfserr_notsupp; +} + +/* + * Read a symlink. + */ +static int +nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, + struct nfsd3_readlinkres *resp) +{ + u32 *path; + int dummy, nfserr; + + dprintk("nfsd: READLINK %x/%ld\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh)); + + /* Reserve room for status, post_op_attr, and path length */ + svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, 1 + 22 + 1); + + /* Read the symlink. */ + resp->len = NFS3_MAXPATHLEN; + nfserr = nfsd_readlink(rqstp, &argp->fh, (char *) path, &resp->len); + RETURN(nfserr); +} + +/* + * Read a portion of a file. + */ +static int +nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, + struct nfsd3_readres *resp) +{ + u32 * buffer; + int nfserr, avail; + + dprintk("nfsd: READ %x/%ld %lu bytes at %lu\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + (unsigned long) argp->count, + (unsigned long) argp->offset); + + /* Obtain buffer pointer for payload. + * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) + * + 1 (xdr opaque byte count) = 26 + */ + svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail, 26); + + if ((avail << 2) < argp->count) { + printk(KERN_NOTICE + "oversized read request from %08lx:%d (%d bytes)\n", + ntohl(rqstp->rq_addr.sin_addr.s_addr), + ntohs(rqstp->rq_addr.sin_port), + argp->count); + argp->count = avail; + } + + resp->count = argp->count; + resp->fh = argp->fh; + nfserr = nfsd_read(rqstp, &resp->fh, + argp->offset, + (char *) buffer, + &resp->count); + + RETURN(nfserr); +} + +/* + * Write data to a file + */ +static int +nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, + struct nfsd3_writeres *resp) +{ + int nfserr; + + dprintk("nfsd: WRITE %x/%ld %d bytes at %ld\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->len, + (unsigned long) argp->offset); + + resp->fh = argp->fh; + nfserr = nfsd_write(rqstp, &resp->fh, + argp->offset, + argp->data, + argp->len, + argp->stable); + resp->committed = argp->stable; + RETURN(nfserr); +} + +/* + * With NFSv3, CREATE processing is a lot easier than with NFSv2. + * At least in theory; we'll see how it fares in practice when the + * first reports about SunOS compatibility problems start to pour in... + */ +static int +nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, + struct nfsd3_createres *resp) +{ + svc_fh *dirfhp, *newfhp = NULL; + struct iattr *attr; + int mode; + u32 nfserr; + + dprintk("nfsd: CREATE %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + dirfhp = fh_copy(&resp->dirfh, &argp->fh); + newfhp = fh_init(&resp->fh); + attr = &argp->attrs; + + /* Get the directory inode */ + nfserr = fh_lookup(rqstp, dirfhp, S_IFDIR, MAY_CREATE); + if (nfserr) + RETURN(nfserr); + + /* Unfudge the mode bits */ + attr->ia_mode &= ~S_IFMT; + if (!(attr->ia_valid & ATTR_MODE)) { + attr->ia_valid |= ATTR_MODE; + attr->ia_mode = S_IFREG; + } + mode = attr->ia_mode & ~S_IFMT; + + /* Now create the file and set attributes */ + nfserr = nfsd_create(rqstp, dirfhp, argp->name, argp->len, + attr, S_IFREG, 0, newfhp, + argp->createmode); + + RETURN(nfserr); +} + +static int +nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, + struct nfsd3_attrstat *resp) +{ + int nfserr; + + dprintk("nfsd: REMOVE %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + fh_copy(&resp->fh, &argp->fh); + + /* Unlink. -S_IFDIR means file must not be a directory */ + nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, + argp->name, argp->len); + RETURN(nfserr); +} + +static int +nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: RENAME %x/%ld %s -> %x/%ld %s\n", + SVCFH_DEV(&argp->ffh), + SVCFH_INO(&argp->ffh), + argp->fname, + SVCFH_DEV(&argp->tfh), + SVCFH_INO(&argp->tfh), + argp->tname); + + nfserr = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen, + &argp->tfh, argp->tname, argp->tlen); + fh_put(&argp->ffh); + fh_put(&argp->tfh); + RETURN(nfserr); +} + +static int +nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: LINK %x/%ld -> %x/%ld %s\n", + SVCFH_DEV(&argp->ffh), + SVCFH_INO(&argp->ffh), + SVCFH_DEV(&argp->tfh), + SVCFH_INO(&argp->tfh), + argp->tname); + + nfserr = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen, + &argp->ffh); + fh_put(&argp->ffh); + fh_put(&argp->tfh); + RETURN(nfserr); +} + +static int +nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp, + void *resp) +{ + struct svc_fh newfh; + int nfserr; + + dprintk("nfsd: SYMLINK %x/%ld %s -> %s\n", + SVCFH_DEV(&argp->ffh), + SVCFH_INO(&argp->ffh), + argp->fname, argp->tname); + + memset(&newfh, 0, sizeof(newfh)); + + /* + * Create the link, look up new file and set attrs. + */ + nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, + argp->tname, argp->tlen, + &newfh); + if (nfserr) + nfserr = nfsd_setattr(rqstp, &newfh, &argp->attrs); + + fh_put(&argp->ffh); + fh_put(&newfh); + RETURN(nfserr); +} + +/* + * Make directory. This operation is not idempotent. + */ +static int +nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, + struct nfsd3_diropres *resp) +{ + int nfserr; + + dprintk("nfsd: MKDIR %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, + &argp->attrs, S_IFDIR, 0, &resp->fh); + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * Remove a directory + */ +static int +nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: RMDIR %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + nfserr = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len); + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * Read a portion of a directory. + */ +static int +nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, + struct nfsd3_readdirres *resp) +{ + u32 * buffer; + int nfserr, count; + + dprintk("nfsd: READDIR %x/%ld %d bytes at %d\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->count, argp->cookie); + + /* Reserve buffer space for status */ + svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, 1); + + /* Make sure we've room for the NULL ptr & eof flag, and shrink to + * client read size */ + if ((count -= 8) > argp->count) + count = argp->count; + + /* Read directory and encode entries on the fly */ + nfserr = nfsd_readdir(rqstp, &argp->fh, (loff_t) argp->cookie, + nfssvc_encode_entry, + buffer, &count); + resp->count = count; + + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * Get file system info + */ +static int +nfsd3_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, + struct nfsd3_statfsres *resp) +{ + int nfserr; + + dprintk("nfsd: STATFS %x/%ld\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh)); + + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * NFSv2 Server procedures. + * Only the results of non-idempotent operations are cached. + */ +#define nfsd3_proc_none NULL +#define nfssvc_encode_void NULL +#define nfssvc_decode_void NULL +#define nfssvc_release_void NULL +struct nfsd3_void { int dummy; }; + +#define PROC(name, argt, rest, relt, cache) \ + { (svc_procfunc) nfsd3_proc_##name, \ + (kxdrproc_t) nfssvc_decode_##argt, \ + (kxdrproc_t) nfssvc_encode_##rest, \ + (kxdrproc_t) nfssvc_release_##relt, \ + sizeof(struct nfsd3_##argt), \ + sizeof(struct nfsd3_##rest), \ + 0, \ + cache \ + } +struct svc_procedure nfsd3_procedures2[18] = { + PROC(null, void, void, void, RC_NOCACHE), + PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE), + PROC(setattr, sattrargs, attrstat, fhandle, RC_REPLBUFF), + PROC(none, void, void, void, RC_NOCACHE), + PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE), + PROC(readlink, fhandle, readlinkres, void, RC_NOCACHE), + PROC(read, readargs, readres, fhandle, RC_NOCACHE), + PROC(none, void, void, void, RC_NOCACHE), + PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF), + PROC(create, createargs, diropres, fhandle, RC_REPLBUFF), + PROC(remove, diropargs, void, void, RC_REPLSTAT), + PROC(rename, renameargs, void, void, RC_REPLSTAT), + PROC(link, linkargs, void, void, RC_REPLSTAT), + PROC(symlink, symlinkargs, void, void, RC_REPLSTAT), + PROC(mkdir, createargs, diropres, fhandle, RC_REPLBUFF), + PROC(rmdir, diropargs, void, void, RC_REPLSTAT), + PROC(readdir, readdirargs, readdirres, void, RC_REPLSTAT), + PROC(statfs, fhandle, statfsres, void, RC_NOCACHE), +}; + + +/* + * Map errnos to NFS errnos. + */ +int +nfserrno (int errno) +{ + static struct { + int nfserr; + int syserr; + } nfs_errtbl[] = { + { NFS_OK, 0 }, + { NFSERR_PERM, EPERM }, + { NFSERR_NOENT, ENOENT }, + { NFSERR_IO, EIO }, + { NFSERR_NXIO, ENXIO }, + { NFSERR_ACCES, EACCES }, + { NFSERR_EXIST, EEXIST }, + { NFSERR_NODEV, ENODEV }, + { NFSERR_NOTDIR, ENOTDIR }, + { NFSERR_ISDIR, EISDIR }, + { NFSERR_INVAL, EINVAL }, + { NFSERR_FBIG, EFBIG }, + { NFSERR_NOSPC, ENOSPC }, + { NFSERR_ROFS, EROFS }, + { NFSERR_NAMETOOLONG, ENAMETOOLONG }, + { NFSERR_NOTEMPTY, ENOTEMPTY }, +#ifdef EDQUOT + { NFSERR_DQUOT, EDQUOT }, +#endif + { NFSERR_STALE, ESTALE }, + { NFSERR_WFLUSH, EIO }, + { -1, EIO } + }; + int i; + + for (i = 0; nfs_errtbl[i].nfserr != -1; i++) { + if (nfs_errtbl[i].syserr == errno) + return htonl (nfs_errtbl[i].nfserr); + } + printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno); + return nfserr_io; +} + +#if 0 +static void +nfsd3_dump(char *tag, u32 *buf, int len) +{ + int i; + + printk(KERN_NOTICE + "nfsd: %s (%d words)\n", tag, len); + + for (i = 0; i < len && i < 32; i += 8) + printk(KERN_NOTICE + " %08lx %08lx %08lx %08lx" + " %08lx %08lx %08lx %08lx\n", + buf[i], buf[i+1], buf[i+2], buf[i+3], + buf[i+4], buf[i+5], buf[i+6], buf[i+7]); +} +#endif diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c new file mode 100644 index 000000000..31a5ef887 --- /dev/null +++ b/fs/nfsd/nfs3xdr.c @@ -0,0 +1,785 @@ +/* + * linux/fs/nfsd/nfs3xdr.c + * + * XDR support for nfsd/protocol version 3. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/nfs3.h> + +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/xdr3.h> + +#define NFSDDBG_FACILITY NFSDDBG_XDR + +u32 nfs_ok, nfserr_perm, nfserr_noent, nfserr_io, nfserr_nxio, + nfserr_acces, nfserr_exist, nfserr_nodev, nfserr_notdir, + nfserr_isdir, nfserr_fbig, nfserr_nospc, nfserr_rofs, + nfserr_nametoolong, nfserr_dquot, nfserr_stale; + +#ifdef NFSD_OPTIMIZE_SPACE +# define inline +#endif + +/* + * Mapping of S_IF* types to NFS file types + */ +static u32 nfs3_ftypes[] = { + NF3NON, NF3FIFO, NF3CHR, NF3BAD, + NF3DIR, NF3BAD, NF3BLK, NF3BAD, + NF3REG, NF3BAD, NF3LNK, NF3BAD, + NF3SOCK, NF3BAD, NF3LNK, NF3BAD, +}; + +/* + * Initialization of NFS status variables + */ +void +nfs3xdr_init(void) +{ + static int inited = 0; + + if (inited) + return; + + nfs_ok = htonl(NFS_OK); + nfserr_perm = htonl(NFSERR_PERM); + nfserr_noent = htonl(NFSERR_NOENT); + nfserr_io = htonl(NFSERR_IO); + nfserr_nxio = htonl(NFSERR_NXIO); + nfserr_acces = htonl(NFSERR_ACCES); + nfserr_exist = htonl(NFSERR_EXIST); + nfserr_nodev = htonl(NFSERR_NODEV); + nfserr_notdir = htonl(NFSERR_NOTDIR); + nfserr_isdir = htonl(NFSERR_ISDIR); + nfserr_fbig = htonl(NFSERR_FBIG); + nfserr_nospc = htonl(NFSERR_NOSPC); + nfserr_rofs = htonl(NFSERR_ROFS); + nfserr_nametoolong = htonl(NFSERR_NAMETOOLONG); + nfserr_dquot = htonl(NFSERR_DQUOT); + nfserr_stale = htonl(NFSERR_STALE); + + inited = 1; +} + +/* + * XDR functions for basic NFS types + */ +static inline u32 * +enc64(u32 *p, u64 val) +{ + *p++ = (val >> 32); + *p++ = (val & 0xffffffff); + return p; +} + +static inline u32 * +dec64(u32 *p, u64 *valp) +{ + *valp = ((u64) ntohl(*p++)) << 32; + *valp |= ntohl(*p++); + return p; +} + +static inline u32 * +encode_time3(u32 *p, time_t secs) +{ + *p++ = htonl((u32) secs); *p++ = 0; + return p; +} + +static inline u32 * +decode_time3(u32 *p, time_t *secp) +{ + *secp = ntohl(*p++); + return p + 1; +} + +static inline u32 * +decode_fh(u32 *p, struct svc_fh *fhp) +{ + if (*p++ != sizeof(struct knfs_fh)) + return NULL; + + memcpy(&fhp->fh_handle, p, sizeof(struct knfs_fh)); + fhp->fh_inode = NULL; + fhp->fh_export = NULL; + + return p + (sizeof(struct knfs_fh) >> 2); +} + +static inline u32 * +encode_fh(u32 *p, struct svc_fh *fhp) +{ + *p++ = htonl(sizeof(struct knfs_fh)); + memcpy(p, &fhp->fh_handle, sizeof(struct knfs_fh)); + return p + (sizeof(struct knfs_fh) >> 2); +} + +/* + * Decode a file name and make sure that the path contains + * no slashes or null bytes. + */ +static inline u32 * +decode_filename(u32 *p, char **namp, int *lenp) +{ + char *name; + int i; + + if ((p = xdr_decode_string(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0' || *name == '/') + return NULL; + } + *name = '\0'; + } + + return p; +} + +static inline u32 * +decode_pathname(u32 *p, char **namp, int *lenp) +{ + char *name; + int i; + + if ((p = xdr_decode_string(p, namp, lenp, NFS3_MAXPATHLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0') + return NULL; + } + *name = '\0'; + } + + return p; +} + +static inline u32 * +decode_sattr3(u32 *p, struct iattr *iap) +{ + u32 tmp; + + iap->ia_valid = 0; + + if (*p++) { + iap->ia_valid |= ATTR_MODE; + iap->ia_mode = ntohl(*p++); + } + if (*p++) { + iap->ia_valid |= ATTR_UID; + iap->ia_uid = ntohl(*p++); + } + if (*p++) { + iap->ia_valid |= ATTR_GID; + iap->ia_gid = ntohl(*p++); + } + if (*p++) { + iap->ia_valid |= ATTR_SIZE; + iap->ia_size = ntohl(*p++); + } + if ((tmp = *p++) == 1) { + iap->ia_valid |= ATTR_ATIME; + } else if (tmp == 2) { + iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; + iap->ia_atime = ntohl(*p++), p++; + } + if ((tmp = *p++) != 0) { + iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; + } else if (tmp == 2) { + iap->ia_valid |= ATTR_MTIME; + iap->ia_mtime = ntohl(*p++), p++; + } + return p; +} + +static inline u32 * +encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct inode *inode) +{ + if (!inode) { + printk("nfsd: NULL inode in %s:%d", __FILE__, __LINE__); + return NULL; + } + + *p++ = htonl(nfs3_ftypes[(inode->i_mode & S_IFMT) >> 12]); + *p++ = htonl((u32) inode->i_mode); + *p++ = htonl((u32) inode->i_nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, inode->i_uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, inode->i_gid)); + if (S_ISLNK(inode->i_mode) && inode->i_size > NFS3_MAXPATHLEN) { + p = enc64(p, (u64) NFS3_MAXPATHLEN); + } else { + p = enc64(p, (u64) inode->i_size); + } + p = enc64(p, inode->i_blksize * inode->i_blocks); + *p++ = htonl((u32) MAJOR(inode->i_rdev)); + *p++ = htonl((u32) MINOR(inode->i_rdev)); + p = enc64(p, (u64) inode->i_dev); + p = enc64(p, (u64) inode->i_ino); + p = encode_time3(p, inode->i_atime); + p = encode_time3(p, inode->i_mtime); + p = encode_time3(p, inode->i_ctime); + + return p; +} + +/* + * Encode post-operation attributes. + * The inode may be NULL if the call failed because of a stale file + * handle. In this case, no attributes are returned. + */ +static u32 * +encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct inode *inode) +{ + if (inode == NULL) { + *p++ = xdr_zero; + return p; + } + return encode_fattr3(rqstp, p, inode); +} + +/* + * Enocde weak cache consistency data + */ +static u32 * +encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + struct inode *inode = fhp->fh_inode; + + if (fhp->fh_post_version == inode->i_version) { + *p++ = xdr_one; + p = enc64(p, (u64) fhp->fh_pre_size); + p = encode_time3(p, fhp->fh_pre_mtime); + p = encode_time3(p, fhp->fh_pre_ctime); + } else { + *p++ = xdr_zero; + } + return encode_post_op_attr(rqstp, p, inode); +} + +/* + * Check buffer bounds after decoding arguments + */ +static inline int +xdr_argsize_check(struct svc_rqst *rqstp, u32 *p) +{ + struct svc_buf *buf = &rqstp->rq_argbuf; + + return p - buf->base <= buf->buflen; +} + +static inline int +xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) +{ + struct svc_buf *buf = &rqstp->rq_resbuf; + + buf->len = p - buf->base; + dprintk("nfsd: ressize_check p %p base %p len %d\n", + p, buf->base, buf->buflen); + return (buf->len <= buf->buflen); +} + +/* + * XDR decode functions + */ +int +nfs3svc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + if (!(p = decode_fh(p, fhp))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_sattrargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_sattr3(p, &args->attrs)) + || (*p++ && !(p = decode_time3(p, &args->guardtime)))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_diropargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_diropargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_accessargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + args->access = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = dec64(p, &args->offset)) + || !(p = dec64(p, &args->count))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_writeargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = dec64(p, &args->offset)) + || !(p = dec64(p, &args->count))) + return 0; + + args->stable = ntohl(*p++); + args->len = ntohl(*p++); + args->data = (char *) p; + p += (args->len + 3) >> 2; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_createargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_createargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + switch (args->createmode = ntohl(*p++)) { + case 0: case 1: + if (!(p = decode_sattr3(p, &args->attrs))) + return 0; + break; + case 2: + args->verf = p; + p += 2; + break; + default: + return 0; + } + + return xdr_argsize_check(rqstp, p); +} +int +nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_createargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len)) + || !(p = decode_sattr3(p, &args->attrs))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_symlinkargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_sattr3(p, &args->attrs)) + || !(p = decode_pathname(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_mknodargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + args->ftype = ntohl(*p++); + + if (args->ftype == NF3BLK || args->ftype == NF3CHR + || args->ftype == NF3SOCK || args->ftype == NF3FIFO) { + if (!(p = decode_sattr3(p, &args->attrs))) + return 0; + } + + if (args->ftype == NF3BLK || args->ftype == NF3CHR) { + args->major = ntohl(*p++); + args->minor = ntohl(*p++); + } + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_renameargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_renameargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_linkargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_linkargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readdirargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + args->cookie = ntohl(*p++); + args->verf = p; p += 2; + args->count = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readdirargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + args->cookie = ntohl(*p++); + args->verf = p; p += 2; + args->dircount = ntohl(*p++); + args->count = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_commitargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_commitargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = dec64(p, &args->offset))) + return 0; + args->count = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ +/* GETATTR */ +int +nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_attrstat *resp) +{ + if (!(p = encode_fattr3(rqstp, p, resp->fh.fh_inode))) + return 0; + return xdr_ressize_check(rqstp, p); +} + +/* SETATTR, REMOVE, RMDIR */ +int +nfs3svc_encode_wccstat(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_attrstat *resp) +{ + if (!(p = encode_wcc_data(rqstp, p, &resp->fh))) + return 0; + return xdr_ressize_check(rqstp, p); +} + +/* LOOKUP */ +int +nfs3svc_encode_lookupres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_lookupres *resp) +{ + if (resp->status == 0) { + p = encode_fh(p, &resp->fh); + if (!(p = encode_fattr3(rqstp, p, resp->fh.fh_inode))) + return 0; + } + p = encode_post_op_attr(rqstp, p, resp->dirfh.fh_inode); + return xdr_ressize_check(rqstp, p); +} + +/* ACCESS */ +int +nfs3svc_encode_accessres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessres *resp) +{ + p = encode_post_op_attr(rqstp, p, resp->fh.fh_inode); + if (resp->status == 0) + *p++ = htonl(resp->access); + return xdr_ressize_check(rqstp, p); +} + +/* READLINK */ +int +nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readlinkres *resp) +{ + p = encode_post_op_attr(rqstp, p, resp->fh.fh_inode); + if (resp->status == 0) { + *p++ = htonl(resp->len); + p += XDR_QUADLEN(resp->len); + } + return xdr_ressize_check(rqstp, p); +} + +/* READ */ +int +nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readres *resp) +{ + p = encode_post_op_attr(rqstp, p, resp->fh.fh_inode); + if (resp->status == 0) { + *p++ = htonl(resp->count); + *p++ = htonl(resp->eof); + *p++ = htonl(resp->count); /* xdr opaque count */ + p += XDR_QUADLEN(resp->count); + } + return xdr_ressize_check(rqstp, p); +} + +/* WRITE */ +int +nfs3svc_encode_writeres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_writeres *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->count); + *p++ = htonl(resp->committed); + *p++ = htonl(nfssvc_boot.tv_sec); + *p++ = htonl(nfssvc_boot.tv_usec); + } + return xdr_ressize_check(rqstp, p); +} + +/* CREATE, MKDIR, SYMLINK, MKNOD */ +int +nfs3svc_encode_createres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_createres *resp) +{ + if (resp->status == 0) { + p = encode_fh(p, &resp->fh); + p = encode_post_op_attr(rqstp, p, resp->fh.fh_inode); + } + p = encode_wcc_data(rqstp, p, &resp->dirfh); + return xdr_ressize_check(rqstp, p); +} + +/* RENAME */ +int +nfs3svc_encode_renameres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_renameres *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->ffh); + p = encode_wcc_data(rqstp, p, &resp->tfh); + return xdr_ressize_check(rqstp, p); +} + +/* LINK */ +int +nfs3svc_encode_linkres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_linkres *resp) +{ + p = encode_post_op_attr(rqstp, p, resp->fh.fh_inode); + p = encode_wcc_data(rqstp, p, &resp->tfh); + return xdr_ressize_check(rqstp, p); +} + +/* READDIR */ +int +nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readdirres *resp) +{ + p = encode_post_op_attr(rqstp, p, resp->fh.fh_inode); + if (resp->status == 0) { + /* stupid readdir cookie */ + *p++ = ntohl(resp->fh.fh_inode->i_mtime); + *p++ = xdr_zero; + p = resp->list_end; + } + + return xdr_ressize_check(rqstp, p); +} + +#define NFS3_ENTRYPLUS_BAGGAGE ((1 + 20 + 1 + NFS3_FHSIZE) << 2) +int +nfs3svc_encode_entry(struct readdir_cd *cd, const char *name, + int namlen, unsigned long offset, ino_t ino) +{ + u32 *p = cd->buffer; + int buflen, slen, elen; + struct svc_fh fh; + + if (offset > ~((u64) 0)) + return -EINVAL; + if (cd->offset) + *cd->offset = htonl(offset); + + /* For readdirplus, look up the inode */ + if (cd->plus && nfsd_lookup(cd->rqstp, cd->dirfh, name, namlen, &fh)) + return 0; + + /* truncate filename if too long */ + if (namlen > NFS3_MAXNAMLEN) + namlen = NFS3_MAXNAMLEN; + + slen = XDR_QUADLEN(namlen); + elen = slen + (cd->plus? NFS3_ENTRYPLUS_BAGGAGE : 0); + if ((buflen = cd->buflen - elen - 4) < 0) { + cd->eob = 1; + if (cd->plus) + fh_put(&fh); + return -EINVAL; + } + *p++ = xdr_one; /* mark entry present */ + *p++ = xdr_zero; /* file id (64 bit) */ + *p++ = htonl((u32) ino); + *p++ = htonl((u32) namlen); /* name length & name */ + memcpy(p, name, namlen); + p += slen; + + /* throw in readdirplus baggage */ + if (cd->plus) { + p = encode_post_op_attr(cd->rqstp, p, fh.fh_inode); + p = encode_fh(p, &fh); + fh_put(&fh); + } + + cd->offset = p; /* remember pointer */ + p = enc64(p, ~(u64) 0); /* offset of next entry */ + + cd->buflen = buflen; + cd->buffer = p; + return 0; +} + +/* FSSTAT */ +int +nfs3svc_encode_statfsres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_statfsres *resp) +{ + struct statfs *s = &resp->stats; + u64 bs = s->f_bsize; + + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + p = enc64(p, bs * s->f_blocks); /* total bytes */ + p = enc64(p, bs * s->f_bfree); /* free bytes */ + p = enc64(p, bs * s->f_bavail); /* user available bytes */ + p = enc64(p, s->f_files); /* total inodes */ + p = enc64(p, s->f_ffree); /* free inodes */ + p = enc64(p, s->f_ffree); /* user available inodes */ + *p++ = htonl(resp->invarsec); /* mean unchanged time */ + } + return xdr_ressize_check(rqstp, p); +} + +/* FSINFO */ +int +nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_fsinfores *resp) +{ + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + *p++ = htonl(resp->f_rtmax); + *p++ = htonl(resp->f_rtpref); + *p++ = htonl(resp->f_rtmult); + *p++ = htonl(resp->f_wtmax); + *p++ = htonl(resp->f_wtpref); + *p++ = htonl(resp->f_wtmult); + *p++ = htonl(resp->f_dtpref); + *p++ = htonl(resp->f_maxfilesize); + *p++ = xdr_zero; + *p++ = htonl(1000000000 / HZ); + *p++ = htonl(resp->f_properties); + } + + return xdr_ressize_check(rqstp, p); +} + +/* PATHCONF */ +int +nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_pathconfres *resp) +{ + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + *p++ = htonl(resp->p_link_max); + *p++ = htonl(resp->p_name_max); + *p++ = xdr_one; /* always reject long file names */ + *p++ = xdr_one; /* chown restricted */ + *p++ = htonl(resp->p_case_insensitive); + *p++ = htonl(resp->p_case_preserving); + } + + return xdr_ressize_check(rqstp, p); +} + +/* COMMIT */ +int +nfs3svc_encode_commitres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_commitres *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->fh); + /* Write verifier */ + if (resp->status == 0) { + *p++ = htonl(nfssvc_boot.tv_sec); + *p++ = htonl(nfssvc_boot.tv_usec); + } + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +int +nfs3svc_release_fhandle(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *resp) +{ + fh_put(&resp->fh); + return 1; +} + +int +nfs3svc_release_fhandle2(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_fhandle2 *resp) +{ + fh_put(&resp->fh1); + fh_put(&resp->fh2); + return 1; +} diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c new file mode 100644 index 000000000..e67147232 --- /dev/null +++ b/fs/nfsd/nfscache.c @@ -0,0 +1,321 @@ +/* + * linux/fs/nfsd/nfscache.c + * + * Request reply cache. This is currently a global cache, but this may + * change in the future and be a per-client cache. + * + * This code is heavily inspired by the 44BSD implementation, although + * it does things a bit differently. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/malloc.h> +#include <linux/string.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> + +/* Size of reply cache. Common values are: + * 4.3BSD: 128 + * 4.4BSD: 256 + * Solaris2: 1024 + * DEC Unix: 512-4096 + */ +#define CACHESIZE 1024 +#define HASHSIZE 64 +#define REQHASH(xid) ((((xid) >> 24) ^ (xid)) & (HASHSIZE-1)) + +struct nfscache_head { + struct svc_cacherep * next; + struct svc_cacherep * prev; +}; + +static struct nfscache_head hash_list[HASHSIZE]; +static struct svc_cacherep * lru_head; +static struct svc_cacherep * lru_tail; +static struct svc_cacherep nfscache[CACHESIZE]; +static int cache_initialized = 0; +static int cache_disabled = 1; + +static int nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data); + +void +nfsd_cache_init(void) +{ + struct svc_cacherep *rp; + struct nfscache_head *rh; + int i; + + if (cache_initialized) + return; + + for (i = 0, rh = hash_list; i < HASHSIZE; i++, rh++) + rh->next = rh->prev = (struct svc_cacherep *) rh; + + for (i = 0, rp = nfscache; i < CACHESIZE; i++, rp++) { + rp->c_state = RC_UNUSED; + rp->c_type = RC_NOCACHE; + rp->c_hash_next = + rp->c_hash_prev = rp; + rp->c_lru_next = rp + 1; + rp->c_lru_prev = rp - 1; + } + lru_head = nfscache; + lru_tail = nfscache + CACHESIZE - 1; + lru_head->c_lru_prev = NULL; + lru_tail->c_lru_next = NULL; + + cache_initialized = 1; + cache_disabled = 0; +} + +void +nfsd_cache_shutdown(void) +{ + struct svc_cacherep *rp; + + if (!cache_initialized) + return; + + for (rp = lru_head; rp; rp = rp->c_lru_next) { + if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) + kfree(rp->c_replbuf.buf); + } + + cache_initialized = 0; + cache_disabled = 1; +} + +/* + * Move cache entry to front of LRU list + */ +static void +lru_put_front(struct svc_cacherep *rp) +{ + struct svc_cacherep *prev = rp->c_lru_prev, + *next = rp->c_lru_next; + + if (prev) + prev->c_lru_next = next; + else + lru_head = next; + if (next) + next->c_lru_prev = prev; + else + lru_tail = prev; + + rp->c_lru_next = lru_head; + rp->c_lru_prev = NULL; + if (lru_head) + lru_head->c_lru_prev = rp; + lru_head = rp; +} + +/* + * Move a cache entry from one hash list to another + */ +static void +hash_refile(struct svc_cacherep *rp) +{ + struct svc_cacherep *prev = rp->c_hash_prev, + *next = rp->c_hash_next; + struct nfscache_head *head = hash_list + REQHASH(rp->c_xid); + + prev->c_hash_next = next; + next->c_hash_prev = prev; + + rp->c_hash_next = head->next; + rp->c_hash_prev = (struct svc_cacherep *) head; + head->next->c_hash_prev = rp; + head->next = rp; +} + +/* + * Try to find an entry matching the current call in the cache. When none + * is found, we grab the oldest unlocked entry off the LRU list. + * Note that no operation within the loop may sleep. + */ +int +nfsd_cache_lookup(struct svc_rqst *rqstp, int type) +{ + struct svc_cacherep *rh, *rp; + struct svc_client *clp = rqstp->rq_client; + u32 xid = rqstp->rq_xid, + proc = rqstp->rq_proc; + unsigned long age; + + rqstp->rq_cacherep = NULL; + if (cache_disabled || type == RC_NOCACHE) { + nfsdstats.rcnocache++; + return RC_DOIT; + } + + rp = rh = (struct svc_cacherep *) &hash_list[REQHASH(xid)]; + while ((rp = rp->c_hash_next) != rh) { + if (rp->c_state != RC_UNUSED && + xid == rp->c_xid && proc == rp->c_proc && + exp_checkaddr(clp, rp->c_client)) { + nfsdstats.rchits++; + goto found_entry; + } + } + nfsdstats.rcmisses++; + + /* This loop shouldn't take more than a few iterations normally */ + { + int safe = 0; + for (rp = lru_tail; rp; rp = rp->c_lru_prev) { + if (rp->c_state != RC_INPROG) + break; + if (safe++ > CACHESIZE) { + printk("nfsd: loop in repcache LRU list\n"); + cache_disabled = 1; + return RC_DOIT; + } + } + } + + /* This should not happen */ + if (rp == NULL) { + static int complaints = 0; + + printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); + if (++complaints > 5) { + printk(KERN_WARNING "nfsd: disabling repcache.\n"); + cache_disabled = 1; + } + return RC_DOIT; + } + + rqstp->rq_cacherep = rp; + rp->c_state = RC_INPROG; + rp->c_xid = xid; + rp->c_proc = proc; + rp->c_client = rqstp->rq_addr.sin_addr; + hash_refile(rp); + + /* release any buffer */ + if (rp->c_type == RC_REPLBUFF) { + kfree(rp->c_replbuf.buf); + rp->c_replbuf.buf = NULL; + } + rp->c_type = RC_NOCACHE; + + return RC_DOIT; + +found_entry: + /* We found a matching entry which is either in progress or done. */ + age = jiffies - rp->c_timestamp; + rp->c_timestamp = jiffies; + lru_put_front(rp); + + /* Request being processed or excessive rexmits */ + if (rp->c_state == RC_INPROG || age < RC_DELAY) + return RC_DROPIT; + + /* From the hall of fame of impractical attacks: + * Is this a user who tries to snoop on the cache? */ + if (!rqstp->rq_secure && rp->c_secure) + return RC_DOIT; + + /* Compose RPC reply header */ + switch (rp->c_type) { + case RC_NOCACHE: + return RC_DOIT; + case RC_REPLSTAT: + svc_putlong(&rqstp->rq_resbuf, rp->c_replstat); + break; + case RC_REPLBUFF: + if (!nfsd_cache_append(rqstp, &rp->c_replbuf)) + return RC_DOIT; /* should not happen */ + break; + default: + printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); + rp->c_state = RC_UNUSED; + return RC_DOIT; + } + + return RC_REPLY; +} + +/* + * Update a cache entry. This is called from nfsd_dispatch when + * the procedure has been executed and the complete reply is in + * rqstp->rq_res. + * + * We're copying around data here rather than swapping buffers because + * the toplevel loop requires max-sized buffers, which would be a waste + * of memory for a cache with a max reply size of 100 bytes (diropokres). + * + * If we should start to use different types of cache entries tailored + * specifically for attrstat and fh's, we may save even more space. + * + * Also note that a cachetype of RC_NOCACHE can legally be passed when + * nfsd failed to encode a reply that otherwise would have been cached. + * In this case, nfsd_cache_update is called with statp == NULL. + */ +void +nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp) +{ + struct svc_cacherep *rp; + struct svc_buf *resp = &rqstp->rq_resbuf, *cachp; + int len; + + if (!(rp = rqstp->rq_cacherep) || cache_disabled) + return; + + /* Don't cache excessive amounts of data and XDR failures */ + if (!statp || (len = resp->buf - statp) > (256 >> 2)) { + rp->c_state = RC_UNUSED; + return; + } + + switch (cachetype) { + case RC_REPLSTAT: + if (len != 1) + printk("nfsd: RC_REPLSTAT/reply len %d!\n",len); + rp->c_replstat = *statp; + break; + case RC_REPLBUFF: + cachp = &rp->c_replbuf; + cachp->buf = (u32 *) kmalloc(len << 2, GFP_KERNEL); + if (!cachp->buf) { + rp->c_state = RC_UNUSED; + return; + } + cachp->len = len; + memcpy(cachp->buf, statp, len << 2); + break; + } + + lru_put_front(rp); + rp->c_secure = rqstp->rq_secure; + rp->c_type = cachetype; + rp->c_state = RC_DONE; + rp->c_timestamp = jiffies; + + return; +} + +/* + * Copy cached reply to current reply buffer. Should always fit. + */ +static int +nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data) +{ + struct svc_buf *resp = &rqstp->rq_resbuf; + + if (resp->len + data->len > resp->buflen) { + printk(KERN_WARNING "nfsd: cached reply too large (%d).\n", + data->len); + return 0; + } + memcpy(resp->buf, data->buf, data->len); + resp->buf += ((data->len + 3) >> 2); + resp->len += data->len; + return 1; +} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c new file mode 100644 index 000000000..c466321ed --- /dev/null +++ b/fs/nfsd/nfsctl.c @@ -0,0 +1,251 @@ +/* + * linux/fs/nfsd/nfsctl.c + * + * Syscall interface to knfsd. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/version.h> + +#include <linux/linkage.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/fcntl.h> +#include <linux/net.h> +#include <linux/in.h> +#include <linux/nfs.h> +#include <linux/version.h> +#include <linux/unistd.h> +#include <linux/malloc.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/nfsd/syscall.h> + +#if LINUX_VERSION_CODE >= 0x020100 +#include <asm/uaccess.h> +#else +# define copy_from_user memcpy_fromfs +# define copy_to_user memcpy_tofs +# define access_ok !verify_area +#endif +#include <asm/smp.h> +#include <asm/smp_lock.h> + +extern long sys_call_table[]; + +static int nfsctl_svc(struct nfsctl_svc *data); +static int nfsctl_addclient(struct nfsctl_client *data); +static int nfsctl_delclient(struct nfsctl_client *data); +static int nfsctl_export(struct nfsctl_export *data); +static int nfsctl_unexport(struct nfsctl_export *data); +static int nfsctl_getfh(struct nfsctl_fhparm *, struct knfs_fh *); +/* static int nfsctl_ugidupdate(struct nfsctl_ugidmap *data); */ + +static int initialized = 0; + +/* + * Initialize nfsd + */ +static void +nfsd_init(void) +{ + nfsd_xdr_init(); /* XDR */ +#ifdef CONFIG_PROC_FS + nfsd_stat_init(); /* Statistics */ +#endif + nfsd_cache_init(); /* RPC reply cache */ + nfsd_export_init(); /* Exports table */ + nfsd_lockd_init(); /* lockd->nfsd callbacks */ + nfsd_racache_init(); /* Readahead param cache */ + initialized = 1; +} + +static inline int +nfsctl_svc(struct nfsctl_svc *data) +{ + return nfsd_svc(data->svc_port, data->svc_nthreads); +} + +static inline int +nfsctl_addclient(struct nfsctl_client *data) +{ + return exp_addclient(data); +} + +static inline int +nfsctl_delclient(struct nfsctl_client *data) +{ + return exp_delclient(data); +} + +static inline int +nfsctl_export(struct nfsctl_export *data) +{ + return exp_export(data); +} + +static inline int +nfsctl_unexport(struct nfsctl_export *data) +{ + return exp_unexport(data); +} + +#ifdef notyet +static inline int +nfsctl_ugidupdate(nfs_ugidmap *data) +{ + return -EINVAL; +} +#endif + +static inline int +nfsctl_getfh(struct nfsctl_fhparm *data, struct knfs_fh *res) +{ + struct sockaddr_in *sin; + struct svc_client *clp; + int err = 0; + + if (data->gf_addr.sa_family != AF_INET) + return -EPROTONOSUPPORT; + if (data->gf_version < 2 || data->gf_version > NFSSVC_MAXVERS) + return -EINVAL; + sin = (struct sockaddr_in *)&data->gf_addr; + + exp_readlock(); + if (!(clp = exp_getclient(sin))) + err = -EPERM; + else + err = exp_rootfh(clp, data->gf_dev, data->gf_ino, res); + exp_unlock(); + + return err; +} + +#ifdef CONFIG_NFSD +#define handle_sys_nfsservctl sys_nfsservctl +#endif + +int +asmlinkage handle_sys_nfsservctl(int cmd, struct nfsctl_arg *argp, + union nfsctl_res *resp) +{ + struct nfsctl_arg * arg = NULL; + union nfsctl_res * res = NULL; + int err; + + lock_kernel (); + if (!initialized) + nfsd_init(); + if (!suser()) { + err = -EPERM; + goto done; + } + if (!access_ok(VERIFY_READ, argp, sizeof(*argp)) + || (resp && !access_ok(VERIFY_WRITE, resp, sizeof(*resp)))) { + err = -EFAULT; + goto done; + } + if (!(arg = kmalloc(sizeof(*arg), GFP_USER)) || + (resp && !(res = kmalloc(sizeof(*res), GFP_USER)))) { + err = -ENOMEM; /* ??? */ + goto done; + } + copy_from_user(arg, argp, sizeof(*argp)); + if (arg->ca_version != NFSCTL_VERSION) { + printk(KERN_WARNING "nfsd: incompatible version in syscall.\n"); + err = -EINVAL; + goto done; + } + + MOD_INC_USE_COUNT; + switch(cmd) { + case NFSCTL_SVC: + err = nfsctl_svc(&arg->ca_svc); + break; + case NFSCTL_ADDCLIENT: + err = nfsctl_addclient(&arg->ca_client); + break; + case NFSCTL_DELCLIENT: + err = nfsctl_delclient(&arg->ca_client); + break; + case NFSCTL_EXPORT: + err = nfsctl_export(&arg->ca_export); + break; + case NFSCTL_UNEXPORT: + err = nfsctl_unexport(&arg->ca_export); + break; +#ifdef notyet + case NFSCTL_UGIDUPDATE: + err = nfsctl_ugidupdate(&arg->ca_umap); + break; +#endif + case NFSCTL_GETFH: + err = nfsctl_getfh(&arg->ca_getfh, &res->cr_getfh); + break; + default: + err = -EINVAL; + } + MOD_DEC_USE_COUNT; + + if (!err && resp) + copy_to_user(resp, res, sizeof(*resp)); + +done: + if (arg) + kfree(arg); + if (res) + kfree(res); + + unlock_kernel (); + return err; +} + +#ifdef MODULE +/* New-style module support since 2.1.18 */ +#if LINUX_VERSION_CODE >= 131346 +EXPORT_NO_SYMBOLS; +MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +#endif + +static unsigned long old_syscallvec; + +extern int (*do_nfsservctl)(int, void *, void *); + +/* + * Initialize the module + */ +int +init_module(void) +{ + printk("Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); + nfsd_init(); + do_nfsservctl = handle_sys_nfsservctl; + return 0; +} + +/* + * Clean up the mess before unloading the module + */ +void +cleanup_module(void) +{ + if (MOD_IN_USE) { + printk("nfsd: nfsd busy, remove delayed\n"); + return; + } + do_nfsservctl = NULL; + nfsd_export_shutdown(); + nfsd_cache_shutdown(); +#ifdef CONFIG_PROC_FS + nfsd_stat_shutdown(); +#endif + nfsd_lockd_shutdown(); +} +#endif diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c new file mode 100644 index 000000000..526a4455b --- /dev/null +++ b/fs/nfsd/nfsfh.c @@ -0,0 +1,124 @@ +/* + * linux/fs/nfsd/nfsfh.c + * + * NFS server filehandle treatment. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/unistd.h> +#include <linux/string.h> +#include <linux/stat.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> + +#define NFSDDBG_FACILITY NFSDDBG_FH + +/* + * Get the inode version number + */ +static inline int +nfsd_iversion(struct inode *inode) +{ + if (inode->i_sb->s_magic == EXT2_SUPER_MAGIC) + return inode->u.ext2_i.i_version; + return 0; +} + +/* + * Get the inode given a file handle. + */ +u32 +fh_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) +{ + struct svc_export *exp; + struct inode *inode; + struct knfs_fh *fh = &fhp->fh_handle; + + /* Already checked */ + if (fhp->fh_inode) + return 0; + + dprintk("nfsd: fh_lookup(exp %x/%ld fh %x/%ld)\n", + fh->fh_xdev, fh->fh_xino, fh->fh_dev, fh->fh_ino); + + /* Make sure that clients don't cheat */ + if (fh->fh_dev != fh->fh_xdev) { + printk(KERN_NOTICE "nfsd: fh with bad dev fields " + "(%x != %x) from %08lx:%d\n", + fh->fh_dev, fh->fh_xdev, + ntohl(rqstp->rq_addr.sin_addr.s_addr), + ntohs(rqstp->rq_addr.sin_port)); + return nfserr_perm; + } + + /* Look up the export entry */ + exp = exp_get(rqstp->rq_client, fh->fh_xdev, fh->fh_xino); + if (!exp) { + /* nfsdstats.fhstale++; */ + return nfserr_stale; /* export entry revoked */ + } + + /* Check if the request originated from a secure port. */ + if (!rqstp->rq_secure && EX_SECURE(exp)) { + printk(KERN_WARNING + "nfsd: request from insecure port (%08lx:%d)!\n", + ntohl(rqstp->rq_addr.sin_addr.s_addr), + ntohs(rqstp->rq_addr.sin_port)); + return nfserr_perm; + } + + /* Set user creds if we haven't done so already */ + nfsd_setuser(rqstp, exp); + + /* Get the inode */ + if (!(inode = nfsd_iget(fh->fh_dev, fh->fh_ino)) + || !inode->i_nlink || fh->fh_version != nfsd_iversion(inode)) { + if (inode) + iput(inode); + /* nfsdstats.fhstale++; */ + return nfserr_stale; /* unlinked in the meanwhile */ + } + + /* This is basically what wait_on_inode does */ + while (inode->i_lock) + sleep_on(&inode->i_wait); + fhp->fh_inode = inode; + fhp->fh_export = exp; + + /* Type check. The correct error return for type mismatches + * does not seem to be generally agreed upon. SunOS seems to + * use EISDIR if file isn't S_IFREG; a comment in the NFSv3 + * spec says this is incorrect (implementation notes for the + * write call). + */ + if (type > 0 && (inode->i_mode & S_IFMT) != type) + return (type == S_IFDIR)? nfserr_notdir : nfserr_isdir; + if (type < 0 && (inode->i_mode & S_IFMT) == -type) + return (type == -S_IFDIR)? nfserr_notdir : nfserr_isdir; + + /* Finally, check access permissions */ + return nfsd_permission(fhp->fh_export, inode, access); +} + +/* + * Compose file handle for NFS reply. + */ +void +fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct inode *inode) +{ + dprintk("nfsd: fh_compose(exp %x/%ld fh %x/%ld)\n", + exp->ex_dev, exp->ex_ino, inode->i_dev, inode->i_ino); + + fh_init(fhp); /* initialize empty fh */ + fhp->fh_inode = inode; + fhp->fh_export = exp; + fhp->fh_handle.fh_dev = inode->i_dev; + fhp->fh_handle.fh_ino = inode->i_ino; + fhp->fh_handle.fh_xdev = exp->ex_dev; + fhp->fh_handle.fh_xino = exp->ex_ino; + fhp->fh_handle.fh_version = nfsd_iversion(inode); +} diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c new file mode 100644 index 000000000..05b13deab --- /dev/null +++ b/fs/nfsd/nfsproc.c @@ -0,0 +1,581 @@ +/* + * nfsproc2.c Process version 2 NFS requests. + * + * Copyright (C) 1995 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/linkage.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/net.h> +#include <linux/in.h> +#include <linux/version.h> +#include <linux/unistd.h> +#include <linux/malloc.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> + +typedef struct svc_rqst svc_rqst; +typedef struct svc_buf svc_buf; + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +#define sleep(msec) \ + { printk(KERN_NOTICE "nfsd: sleeping %d msecs\n", msec); \ + current->state = TASK_INTERRUPTIBLE; \ + current->timeout = jiffies + msec / 10; \ + schedule(); \ + } +#define RETURN(st) return st + +static void +svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr) +{ + *ptr = buf->buf + nr; + *len = buf->buflen - buf->len - nr; +} + +static int +nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + RETURN(nfs_ok); +} + +/* + * Get a file's attributes + */ +static int +nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, + struct nfsd_attrstat *resp) +{ + dprintk("nfsd: GETATTR %x/%ld\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + RETURN(fh_lookup(rqstp, &resp->fh, 0, MAY_NOP)); +} + +/* + * Set a file's attributes + */ +static int +nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp, + struct nfsd_attrstat *resp) +{ + dprintk("nfsd: SETATTR %x/%ld\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + RETURN(nfsd_setattr(rqstp, &resp->fh, &argp->attrs)); +} + +/* + * Look up a path name component + */ +static int +nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp, + struct nfsd_diropres *resp) +{ + int nfserr; + + dprintk("nfsd: LOOKUP %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + nfserr = nfsd_lookup(rqstp, &argp->fh, + argp->name, + argp->len, + &resp->fh); + + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * Read a symlink. + */ +static int +nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, + struct nfsd_readlinkres *resp) +{ + u32 *path; + int dummy, nfserr; + + dprintk("nfsd: READLINK %x/%ld\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh)); + + /* Reserve room for status and path length */ + svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, 2); + + /* Read the symlink. */ + resp->len = NFS_MAXPATHLEN; + nfserr = nfsd_readlink(rqstp, &argp->fh, (char *) path, &resp->len); + + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * Read a portion of a file. + */ +static int +nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, + struct nfsd_readres *resp) +{ + u32 * buffer; + int nfserr, avail; + + dprintk("nfsd: READ %x/%ld %d bytes at %d\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->count, argp->offset); + + /* Obtain buffer pointer for payload. 19 is 1 word for + * status, 17 words for fattr, and 1 word for the byte count. + */ + svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail, 19); + + if ((avail << 2) < argp->count) { + printk(KERN_NOTICE + "oversized read request from %08lx:%d (%d bytes)\n", + ntohl(rqstp->rq_addr.sin_addr.s_addr), + ntohs(rqstp->rq_addr.sin_port), + argp->count); + argp->count = avail; + } + + resp->count = argp->count; + nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), + argp->offset, + (char *) buffer, + &resp->count); + + RETURN(nfserr); +} + +/* + * Write data to a file + */ +static int +nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, + struct nfsd_attrstat *resp) +{ + int nfserr; + + dprintk("nfsd: WRITE %x/%ld %d bytes at %d\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->len, argp->offset); + + nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), + argp->offset, + argp->data, + argp->len, + 0); + RETURN(nfserr); +} + +/* + * CREATE processing is complicated. The keyword here is `overloaded.' + * There's a small race condition here between the check for existence + * and the actual create() call, but one could even consider this a + * feature because this only happens if someone else creates the file + * at the same time. + */ +static int +nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, + struct nfsd_diropres *resp) +{ + struct inode *dirp, *inode = NULL; + struct iattr *attr; + svc_fh *dirfhp, *newfhp = NULL; + int nfserr, type, mode; + int rdonly = 0, exists; + dev_t rdev = NODEV; + + dprintk("nfsd: CREATE %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + dirfhp = &argp->fh; + newfhp = &resp->fh; + attr = &argp->attrs; + + /* Get the directory inode */ + nfserr = fh_lookup(rqstp, dirfhp, S_IFDIR, MAY_EXEC); + if (nfserr) + RETURN(nfserr); + dirp = dirfhp->fh_inode; + + /* Check for MAY_WRITE separately. */ + nfserr = nfsd_permission(dirfhp->fh_export, dirp, MAY_WRITE); + if (nfserr == nfserr_rofs) { + rdonly = 1; /* Non-fatal error for echo > /dev/null */ + } else if (nfserr) { + fh_put(dirfhp); + RETURN(nfserr); + } + + /* First, check if the file already exists. */ + exists = !nfsd_lookup(rqstp, dirfhp, argp->name, argp->len, newfhp); + inode = newfhp->fh_inode; + + /* Unfudge the mode bits */ + if (attr->ia_valid & ATTR_MODE) { + type = attr->ia_mode & S_IFMT; + mode = attr->ia_mode & ~S_IFMT; + if (!type) /* HP weirdness */ + type = S_IFREG; + } else if (exists) { + type = inode->i_mode & S_IFMT; + mode = inode->i_mode & ~S_IFMT; + } else { + type = S_IFREG; + mode = 0; /* ??? */ + } + + /* This is for "echo > /dev/null" a la SunOS. Argh. */ + if (rdonly && (!exists || type == S_IFREG)) { + nfserr = nfserr_rofs; + goto done; + } + + attr->ia_valid |= ATTR_MODE; + attr->ia_mode = type | mode; + + /* Special treatment for non-regular files according to the + * gospel of sun micro + */ + nfserr = 0; + if (type != S_IFREG) { + int is_borc = 0; + u32 size = attr->ia_size; + + rdev = (dev_t) size; + if (type != S_IFBLK && type != S_IFCHR) { + rdev = 0; + } else if (type == S_IFCHR && size == ~(u32) 0) { + /* If you think you've seen the worst, grok this. */ + attr->ia_mode = S_IFIFO | mode; + type = S_IFIFO; + } else if (size != rdev) { + /* dev got truncated because of 16bit Linux dev_t */ + nfserr = nfserr_io; /* or nfserr_inval? */ + goto done; + } else { + /* Okay, char or block special */ + is_borc = 1; + } + + /* Make sure the type and device matches */ + if (exists && (type != (inode->i_mode & S_IFMT) + || (is_borc && inode->i_rdev != rdev))) { + nfserr = nfserr_exist; + goto done; + } + } + + if (!exists) { + /* File doesn't exist. Create it and set attrs */ + nfserr = nfsd_create(rqstp, dirfhp, argp->name, argp->len, + attr, type, rdev, newfhp); + } else if (type == S_IFREG) { + /* File already exists. We ignore all attributes except + * size, so that creat() behaves exactly like + * open(..., O_CREAT|O_TRUNC|O_WRONLY). + */ + if ((attr->ia_valid &= ~(ATTR_SIZE)) != 0) + nfserr = nfsd_setattr(rqstp, newfhp, attr); + } + +done: + fh_put(dirfhp); + RETURN(nfserr); +} + +static int +nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: REMOVE %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + /* Unlink. -SIFDIR means file must not be a directory */ + nfserr = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len); + fh_put(&argp->fh); + RETURN(nfserr); +} + +static int +nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: RENAME %x/%ld %s -> %x/%ld %s\n", + SVCFH_DEV(&argp->ffh), + SVCFH_INO(&argp->ffh), + argp->fname, + SVCFH_DEV(&argp->tfh), + SVCFH_INO(&argp->tfh), + argp->tname); + + nfserr = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen, + &argp->tfh, argp->tname, argp->tlen); + fh_put(&argp->ffh); + fh_put(&argp->tfh); + RETURN(nfserr); +} + +static int +nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: LINK %x/%ld -> %x/%ld %s\n", + SVCFH_DEV(&argp->ffh), + SVCFH_INO(&argp->ffh), + SVCFH_DEV(&argp->tfh), + SVCFH_INO(&argp->tfh), + argp->tname); + + nfserr = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen, + &argp->ffh); + fh_put(&argp->ffh); + fh_put(&argp->tfh); + RETURN(nfserr); +} + +static int +nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp, + void *resp) +{ + struct svc_fh newfh; + int nfserr; + + dprintk("nfsd: SYMLINK %x/%ld %s -> %s\n", + SVCFH_DEV(&argp->ffh), + SVCFH_INO(&argp->ffh), + argp->fname, argp->tname); + + /* + * Create the link, look up new file and set attrs. + */ + nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, + argp->tname, argp->tlen, + &newfh); + if (nfserr) + nfserr = nfsd_setattr(rqstp, &newfh, &argp->attrs); + + fh_put(&argp->ffh); + fh_put(&newfh); + RETURN(nfserr); +} + +/* + * Make directory. This operation is not idempotent. + */ +static int +nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp, + struct nfsd_diropres *resp) +{ + int nfserr; + + dprintk("nfsd: MKDIR %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, + &argp->attrs, S_IFDIR, 0, &resp->fh); + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * Remove a directory + */ +static int +nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: RMDIR %x/%ld %s\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->name); + + nfserr = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len); + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * Read a portion of a directory. + */ +static int +nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp, + struct nfsd_readdirres *resp) +{ + u32 * buffer; + int nfserr, count; + + dprintk("nfsd: READDIR %x/%ld %d bytes at %d\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh), + argp->count, argp->cookie); + + /* Reserve buffer space for status */ + svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, 1); + + /* Make sure we've room for the NULL ptr & eof flag, and shrink to + * client read size */ + if ((count -= 8) > argp->count) + count = argp->count; + + /* Read directory and encode entries on the fly */ + nfserr = nfsd_readdir(rqstp, &argp->fh, (loff_t) argp->cookie, + nfssvc_encode_entry, + buffer, &count); + resp->count = count; + + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * Get file system info + */ +static int +nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, + struct nfsd_statfsres *resp) +{ + int nfserr; + + dprintk("nfsd: STATFS %x/%ld\n", + SVCFH_DEV(&argp->fh), + SVCFH_INO(&argp->fh)); + + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + fh_put(&argp->fh); + RETURN(nfserr); +} + +/* + * NFSv2 Server procedures. + * Only the results of non-idempotent operations are cached. + */ +#define nfsd_proc_none NULL +#define nfssvc_release_none NULL +struct nfsd_void { int dummy; }; + +#define PROC(name, argt, rest, relt, cache) \ + { (svc_procfunc) nfsd_proc_##name, \ + (kxdrproc_t) nfssvc_decode_##argt, \ + (kxdrproc_t) nfssvc_encode_##rest, \ + (kxdrproc_t) nfssvc_release_##relt, \ + sizeof(struct nfsd_##argt), \ + sizeof(struct nfsd_##rest), \ + 0, \ + cache \ + } +struct svc_procedure nfsd_procedures2[18] = { + PROC(null, void, void, none, RC_NOCACHE), + PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE), + PROC(setattr, sattrargs, attrstat, fhandle, RC_REPLBUFF), + PROC(none, void, void, none, RC_NOCACHE), + PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE), + PROC(readlink, fhandle, readlinkres, none, RC_NOCACHE), + PROC(read, readargs, readres, fhandle, RC_NOCACHE), + PROC(none, void, void, none, RC_NOCACHE), + PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF), + PROC(create, createargs, diropres, fhandle, RC_REPLBUFF), + PROC(remove, diropargs, void, none, RC_REPLSTAT), + PROC(rename, renameargs, void, none, RC_REPLSTAT), + PROC(link, linkargs, void, none, RC_REPLSTAT), + PROC(symlink, symlinkargs, void, none, RC_REPLSTAT), + PROC(mkdir, createargs, diropres, fhandle, RC_REPLBUFF), + PROC(rmdir, diropargs, void, none, RC_REPLSTAT), + PROC(readdir, readdirargs, readdirres, none, RC_REPLSTAT), + PROC(statfs, fhandle, statfsres, none, RC_NOCACHE), +}; + + +/* + * Map errnos to NFS errnos. + */ +int +nfserrno (int errno) +{ + static struct { + int nfserr; + int syserr; + } nfs_errtbl[] = { + { NFS_OK, 0 }, + { NFSERR_PERM, EPERM }, + { NFSERR_NOENT, ENOENT }, + { NFSERR_IO, EIO }, + { NFSERR_NXIO, ENXIO }, + { NFSERR_ACCES, EACCES }, + { NFSERR_EXIST, EEXIST }, + { NFSERR_NODEV, ENODEV }, + { NFSERR_NOTDIR, ENOTDIR }, + { NFSERR_ISDIR, EISDIR }, + { NFSERR_INVAL, EINVAL }, + { NFSERR_FBIG, EFBIG }, + { NFSERR_NOSPC, ENOSPC }, + { NFSERR_ROFS, EROFS }, + { NFSERR_NAMETOOLONG, ENAMETOOLONG }, + { NFSERR_NOTEMPTY, ENOTEMPTY }, +#ifdef EDQUOT + { NFSERR_DQUOT, EDQUOT }, +#endif + { NFSERR_STALE, ESTALE }, + { NFSERR_WFLUSH, EIO }, + { -1, EIO } + }; + int i; + + for (i = 0; nfs_errtbl[i].nfserr != -1; i++) { + if (nfs_errtbl[i].syserr == errno) + return htonl (nfs_errtbl[i].nfserr); + } + printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno); + return nfserr_io; +} + +#if 0 +static void +nfsd_dump(char *tag, u32 *buf, int len) +{ + int i; + + printk(KERN_NOTICE + "nfsd: %s (%d words)\n", tag, len); + + for (i = 0; i < len && i < 32; i += 8) + printk(KERN_NOTICE + " %08lx %08lx %08lx %08lx" + " %08lx %08lx %08lx %08lx\n", + buf[i], buf[i+1], buf[i+2], buf[i+3], + buf[i+4], buf[i+5], buf[i+6], buf[i+7]); +} +#endif diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c new file mode 100644 index 000000000..84d296834 --- /dev/null +++ b/fs/nfsd/nfssvc.c @@ -0,0 +1,255 @@ +/* + * linux/fs/nfsd/nfssvc.c + * + * Central processing for nfsd. + * + * Authors: Olaf Kirch (okir@monad.swb.de) + * + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + */ + +#define __NO_VERSION__ +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/nfs.h> +#include <linux/in.h> +#include <linux/uio.h> +#include <linux/version.h> +#include <linux/unistd.h> +#include <linux/malloc.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> + +#include <linux/sunrpc/types.h> +#include <linux/sunrpc/stats.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svcsock.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/stats.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/lockd/bind.h> + +#define NFSDDBG_FACILITY NFSDDBG_SVC +#define NFSD_BUFSIZE (1024 + NFSSVC_MAXBLKSIZE) +#define BLOCKABLE_SIGS (~(_S(SIGKILL) | _S(SIGSTOP))) +#define SHUTDOWN_SIGS (_S(SIGKILL)|_S(SIGINT)|_S(SIGTERM)) +#define _S(sig) (1 << ((sig) - 1)) + +extern struct svc_program nfsd_program; +static void nfsd(struct svc_rqst *rqstp); +struct timeval nfssvc_boot = { 0, 0 }; + +/* + * Make a socket for nfsd + */ +static int +nfsd_makesock(struct svc_serv *serv, int protocol, unsigned short port) +{ + struct sockaddr_in sin; + + dprintk("nfsd: creating socket proto = %d\n", protocol); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = INADDR_ANY; + sin.sin_port = htons(port); + return svc_create_socket(serv, protocol, &sin); +} + +int +nfsd_svc(unsigned short port, int nrservs) +{ + struct svc_serv * serv; + int error; + + dprintk("nfsd: creating service\n"); + if (nrservs < 0) + return -EINVAL; + if (nrservs > NFSD_MAXSERVS) + nrservs = NFSD_MAXSERVS; + + serv = svc_create(&nfsd_program, NFSD_BUFSIZE, NFSSVC_XDRSIZE); + if (serv == NULL) + return -ENOMEM; + + if ((error = nfsd_makesock(serv, IPPROTO_UDP, port)) < 0 + || (error = nfsd_makesock(serv, IPPROTO_TCP, port)) < 0) + goto failure; + + while (nrservs--) { + error = svc_create_thread(nfsd, serv); + if (error < 0) + break; + } + +failure: + svc_destroy(serv); /* Release server */ + return error; +} + +/* + * This is the NFS server kernel thread + */ +static void +nfsd(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + sigset_t oldsigmask; + int oldumask, err; + + lock_kernel(); + /* Lock module and set up kernel thread */ + MOD_INC_USE_COUNT; + exit_mm(current); + current->session = 1; + current->pgrp = 1; + sprintf(current->comm, "nfsd"); + + oldumask = current->fs->umask; /* Set umask to 0. */ + current->blocked |= ~SHUTDOWN_SIGS; + current->fs->umask = 0; + nfssvc_boot = xtime; /* record boot time */ + lockd_up(); /* start lockd */ + + /* + * The main request loop + */ + for (;;) { + /* + * Find a socket with data available and call its + * recvfrom routine. + */ + while ((err = svc_recv(serv, rqstp)) == -EAGAIN) + ; + if (err < 0) + break; + + /* Lock the export hash tables for reading. */ + exp_readlock(); + + /* Validate the client's address. This will also defeat + * port probes on port 2049 by unauthorized clients. + */ + rqstp->rq_client = exp_getclient(&rqstp->rq_addr); + if (!rqstp->rq_client) { + printk(KERN_WARNING "nfsd: unauthenticated request " + "from (%08lx:%d)\n", + ntohl(rqstp->rq_addr.sin_addr.s_addr), + ntohs(rqstp->rq_addr.sin_port)); + svc_drop(rqstp); + serv->sv_stats->rpcbadclnt++; + } else { + /* Process request with all signals blocked. */ + oldsigmask = current->blocked; + current->blocked = BLOCKABLE_SIGS; + svc_process(serv, rqstp); + current->blocked = oldsigmask; + } + + /* Unlock export hash tables */ + exp_unlock(); + } + + if (err != -EINTR) { + printk(KERN_WARNING "nfsd: terminating on error %d\n", -err); + } else { + unsigned int signo; + + for (signo = 0; signo < 32; signo++) + if (current->signal & current->blocked & (1<<signo)) + break; + printk(KERN_WARNING "nfsd: terminating on signal %d\n", signo); + } + + /* Release lockd */ + lockd_down(); + + /* Destroy the thread */ + svc_exit_thread(rqstp); + current->fs->umask = oldumask; + + /* Release module */ + MOD_DEC_USE_COUNT; +} + +static int +nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) +{ + struct svc_procedure *proc; + kxdrproc_t xdr; + u32 nfserr; + + dprintk("nfsd_dispatch: proc %d\n", rqstp->rq_proc); + proc = rqstp->rq_procinfo; + + /* Check whether we have this call in the cache. */ + switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) { + case RC_INTR: + case RC_DROPIT: + return 0; + case RC_REPLY: + return 1; + case RC_DOIT: + /* do it */ + } + + /* Decode arguments */ + xdr = proc->pc_decode; + if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp)) { + dprintk("nfsd: failed to decode arguments!\n"); + nfsd_cache_update(rqstp, RC_NOCACHE, NULL); + *statp = rpc_garbage_args; + return 1; + } + + /* Now call the procedure handler, and encode NFS status. */ + nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); + if (rqstp->rq_proc != 0) + svc_putlong(&rqstp->rq_resbuf, nfserr); + + /* Encode result. + * FIXME: Most NFSv3 calls return wcc data even when the call failed + */ + xdr = proc->pc_encode; + if (!nfserr && xdr + && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) { + /* Failed to encode result. Release cache entry */ + dprintk("nfsd: failed to encode result!\n"); + nfsd_cache_update(rqstp, RC_NOCACHE, NULL); + *statp = rpc_system_err; + return 1; + } + + /* Store reply in cache. */ + nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); + return 1; +} + +static struct svc_version nfsd_version2 = { + 2, 18, nfsd_procedures2, nfsd_dispatch +}; +#ifdef CONFIG_NFSD_NFS3 +static struct svc_version nfsd_version3 = { + 3, 23, nfsd_procedures3, nfsd_dispatch +}; +#endif +static struct svc_version * nfsd_version[] = { + NULL, + NULL, + &nfsd_version2, +#ifdef CONFIG_NFSD_NFS3 + &nfsd_version3, +#endif +}; + +#define NFSD_NRVERS (sizeof(nfsd_version)/sizeof(nfsd_version[0])) +struct svc_program nfsd_program = { + NFS_PROGRAM, /* program number */ + 2, NFSD_NRVERS-1, /* version range */ + NFSD_NRVERS, /* nr of entries in nfsd_version */ + nfsd_version, /* version table */ + "nfsd", /* program name */ + &nfsd_svcstats, /* version table */ +}; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c new file mode 100644 index 000000000..34b9ff3f2 --- /dev/null +++ b/fs/nfsd/nfsxdr.c @@ -0,0 +1,470 @@ +/* + * linux/fs/nfsd/xdr.c + * + * XDR support for nfsd + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/nfs.h> + +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/xdr.h> + +#define NFSDDBG_FACILITY NFSDDBG_XDR + +u32 nfs_ok, nfserr_perm, nfserr_noent, nfserr_io, nfserr_nxio, + nfserr_acces, nfserr_exist, nfserr_nodev, nfserr_notdir, + nfserr_isdir, nfserr_fbig, nfserr_nospc, nfserr_rofs, + nfserr_nametoolong, nfserr_dquot, nfserr_stale; + +#ifdef NFSD_OPTIMIZE_SPACE +# define inline +#endif + +/* + * Mapping of S_IF* types to NFS file types + */ +static u32 nfs_ftypes[] = { + NFNON, NFFIFO, NFCHR, NFBAD, + NFDIR, NFBAD, NFBLK, NFBAD, + NFREG, NFBAD, NFLNK, NFBAD, + NFSOCK, NFBAD, NFLNK, NFBAD, +}; + +/* + * Initialization of NFS status variables + */ +void +nfsd_xdr_init(void) +{ + static int inited = 0; + + if (inited) + return; + + nfs_ok = htonl(NFS_OK); + nfserr_perm = htonl(NFSERR_PERM); + nfserr_noent = htonl(NFSERR_NOENT); + nfserr_io = htonl(NFSERR_IO); + nfserr_nxio = htonl(NFSERR_NXIO); + nfserr_acces = htonl(NFSERR_ACCES); + nfserr_exist = htonl(NFSERR_EXIST); + nfserr_nodev = htonl(NFSERR_NODEV); + nfserr_notdir = htonl(NFSERR_NOTDIR); + nfserr_isdir = htonl(NFSERR_ISDIR); + nfserr_fbig = htonl(NFSERR_FBIG); + nfserr_nospc = htonl(NFSERR_NOSPC); + nfserr_rofs = htonl(NFSERR_ROFS); + nfserr_nametoolong = htonl(NFSERR_NAMETOOLONG); + nfserr_dquot = htonl(NFSERR_DQUOT); + nfserr_stale = htonl(NFSERR_STALE); + + inited = 1; +} + +/* + * XDR functions for basic NFS types + */ +static inline u32 * +decode_fh(u32 *p, struct svc_fh *fhp) +{ + fh_init(fhp); + memcpy(&fhp->fh_handle, p, sizeof(struct knfs_fh)); + + /* FIXME: Look up export pointer here and verify + * Sun Secure RPC if requested */ + return p + (sizeof(struct knfs_fh) >> 2); +} + +static inline u32 * +encode_fh(u32 *p, struct svc_fh *fhp) +{ + memcpy(p, &fhp->fh_handle, sizeof(struct knfs_fh)); + return p + (sizeof(struct knfs_fh) >> 2); +} + +/* + * Decode a file name and make sure that the path contains + * no slashes or null bytes. + */ +static inline u32 * +decode_filename(u32 *p, char **namp, int *lenp) +{ + char *name; + int i; + + if ((p = xdr_decode_string(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0' || *name == '/') + return NULL; + } + *name = '\0'; + } + + return p; +} + +static inline u32 * +decode_pathname(u32 *p, char **namp, int *lenp) +{ + char *name; + int i; + + if ((p = xdr_decode_string(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0') + return NULL; + } + *name = '\0'; + } + + return p; +} + +static inline u32 * +decode_sattr(u32 *p, struct iattr *iap) +{ + u32 tmp, tmp1; + + iap->ia_valid = 0; + + /* Sun client bug compatibility check: some sun clients seem to + * put 0xffff in the mode field when they mean 0xffffffff. + * Quoting the 4.4BSD nfs server code: Nah nah nah nah na nah. + */ + if ((tmp = ntohl(*p++)) != (u32)-1 && tmp != 0xffff) { + iap->ia_valid |= ATTR_MODE; + iap->ia_mode = tmp; + } + if ((tmp = ntohl(*p++)) != (u32)-1) { + iap->ia_valid |= ATTR_UID; + iap->ia_uid = tmp; + } + if ((tmp = ntohl(*p++)) != (u32)-1) { + iap->ia_valid |= ATTR_GID; + iap->ia_gid = tmp; + } + if ((tmp = ntohl(*p++)) != (u32)-1) { + iap->ia_valid |= ATTR_SIZE; + iap->ia_size = tmp; + } + tmp = ntohl(*p++); tmp1 = ntohl(*p++); + if (tmp != (u32)-1 && tmp1 != (u32)-1) { + iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; + iap->ia_atime = tmp; + } + tmp = ntohl(*p++); tmp1 = ntohl(*p++); + if (tmp != (u32)-1 && tmp1 != (u32)-1) { + iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; + iap->ia_mtime = tmp; + } + return p; +} + +static inline u32 * +encode_fattr(struct svc_rqst *rqstp, u32 *p, struct inode *inode) +{ + if (!inode) + return 0; + *p++ = htonl(nfs_ftypes[(inode->i_mode & S_IFMT) >> 12]); + *p++ = htonl((u32) inode->i_mode); + *p++ = htonl((u32) inode->i_nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, inode->i_uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, inode->i_gid)); + if (S_ISLNK(inode->i_mode) && inode->i_size > NFS_MAXPATHLEN) { + *p++ = htonl(NFS_MAXPATHLEN); + } else { + *p++ = htonl((u32) inode->i_size); + } + *p++ = htonl((u32) inode->i_blksize); + *p++ = htonl((u32) inode->i_rdev); + *p++ = htonl((u32) inode->i_blocks); + *p++ = htonl((u32) inode->i_dev); + *p++ = htonl((u32) inode->i_ino); + *p++ = htonl((u32) inode->i_atime); + *p++ = 0; + *p++ = htonl((u32) inode->i_mtime); + *p++ = 0; + *p++ = htonl((u32) inode->i_ctime); + *p++ = 0; + + return p; +} + +/* + * Check buffer bounds after decoding arguments + */ +static inline int +xdr_argsize_check(struct svc_rqst *rqstp, u32 *p) +{ + struct svc_buf *buf = &rqstp->rq_argbuf; + + return p - buf->base <= buf->buflen; +} + +static inline int +xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) +{ + struct svc_buf *buf = &rqstp->rq_resbuf; + + buf->len = p - buf->base; + dprintk("nfsd: ressize_check p %p base %p len %d\n", + p, buf->base, buf->buflen); + return (buf->len <= buf->buflen); +} + +/* + * XDR decode functions + */ +int +nfssvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy) +{ + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + if (!(p = decode_fh(p, fhp))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_sattrargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_sattr(p, &args->attrs))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_diropargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_diropargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + + args->offset = ntohl(*p++); + args->count = ntohl(*p++); + args->totalsize = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_writeargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + + p++; /* beginoffset */ + args->offset = ntohl(*p++); /* offset */ + p++; /* totalcount */ + args->len = ntohl(*p++); + args->data = (char *) p; + p += XDR_QUADLEN(args->len); + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_createargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_createargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len)) + || !(p = decode_sattr(p, &args->attrs))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_renameargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_renameargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_linkargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_linkargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_symlinkargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_pathname(p, &args->tname, &args->tlen)) + || !(p = decode_sattr(p, &args->attrs))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readdirargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + args->cookie = ntohl(*p++); + args->count = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ +int +nfssvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, + struct nfsd_attrstat *resp) +{ + if (!(p = encode_fattr(rqstp, p, resp->fh.fh_inode))) + return 0; + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_diropres *resp) +{ + if (!(p = encode_fh(p, &resp->fh)) + || !(p = encode_fattr(rqstp, p, resp->fh.fh_inode))) + return 0; + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readlinkres *resp) +{ + *p++ = htonl(resp->len); + p += XDR_QUADLEN(resp->len); + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readres *resp) +{ + if (!(p = encode_fattr(rqstp, p, resp->fh.fh_inode))) + return 0; + *p++ = htonl(resp->count); + p += XDR_QUADLEN(resp->count); + + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_readdirres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readdirres *resp) +{ + p += XDR_QUADLEN(resp->count); + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_statfsres *resp) +{ + struct statfs *stat = &resp->stats; + + *p++ = htonl(8 * 1024); /* max transfer size */ + *p++ = htonl(stat->f_bsize); + *p++ = htonl(stat->f_blocks); + *p++ = htonl(stat->f_bfree); + *p++ = htonl(stat->f_bavail); + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_entry(struct readdir_cd *cd, const char *name, + int namlen, off_t offset, ino_t ino) +{ + u32 *p = cd->buffer; + int buflen, slen; + + /* + dprintk("nfsd: entry(%.*s off %ld ino %ld)\n", + namlen, name, offset, ino); + */ + + if (offset > ~((u32) 0)) + return -EINVAL; + if (cd->offset) + *cd->offset = htonl(offset); + if (namlen > NFS2_MAXNAMLEN) + namlen = NFS2_MAXNAMLEN;/* truncate filename */ + + slen = XDR_QUADLEN(namlen); + if ((buflen = cd->buflen - slen - 4) < 0) { + cd->eob = 1; + return -EINVAL; + } + *p++ = xdr_one; /* mark entry present */ + *p++ = htonl((u32) ino); /* file id */ + *p++ = htonl((u32) namlen); /* name length & name */ + memcpy(p, name, namlen); + p += slen; + cd->offset = p; /* remember pointer */ + *p++ = ~(u32) 0; /* offset of next entry */ + + cd->buflen = buflen; + cd->buffer = p; + return 0; +} + +/* + * XDR release functions + */ +int +nfssvc_release_fhandle(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *resp) +{ + fh_put(&resp->fh); + return 1; +} diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c new file mode 100644 index 000000000..ee76fcf6d --- /dev/null +++ b/fs/nfsd/stats.c @@ -0,0 +1,82 @@ +/* + * linux/fs/nfsd/stats.c + * + * procfs-based user access to knfsd statistics + * + * /proc/net/rpc/nfsd + * + * Format: + * rc <hits> <misses> <nocache> + * Statistsics for the reply cache + * plus generic RPC stats (see net/sunrpc/stats.c) + * + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> + +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/stats.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/stats.h> + +struct nfsd_stats nfsdstats; +struct svc_stat nfsd_svcstats = { &nfsd_program, }; + +static int +nfsd_proc_read(char *buffer, char **start, off_t offset, int count, + int *eof, void *data) +{ + int len; + + len = sprintf(buffer, + "rc %d %d %d\n", + nfsdstats.rchits, + nfsdstats.rcmisses, + nfsdstats.rcnocache); + + /* Assume we haven't hit EOF yet. Will be set by svc_proc_read. */ + *eof = 0; + + /* + * Append generic nfsd RPC statistics if there's room for it. + */ + if (len <= offset) { + len = svc_proc_read(buffer, start, offset - len, count, + eof, data); + return len; + } + + if (len < count) { + len += svc_proc_read(buffer + len, start, 0, count - len, + eof, data); + } + + if (offset >= len) { + *start = buffer; + return 0; + } + + *start = buffer + offset; + if ((len -= offset) > count) + return count; + return len; +} + +void +nfsd_stat_init(void) +{ + struct proc_dir_entry *ent; + + if ((ent = svc_proc_register(&nfsd_svcstats)) != 0) + ent->read_proc = nfsd_proc_read; +} + +void +nfsd_stat_shutdown(void) +{ + svc_proc_unregister("nfsd"); +} diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c new file mode 100644 index 000000000..ecec8a22b --- /dev/null +++ b/fs/nfsd/vfs.c @@ -0,0 +1,1094 @@ +/* + * linux/fs/nfsd/vfs.c + * + * File operations used by nfsd. Some of these have been ripped from + * other parts of the kernel because they weren't in ksyms.c, others + * are partial duplicates with added or changed functionality. + * + * Note that several functions lock the inode upon which they want + * to act, most notably those that create directory entries. The + * unlock operation can take place either by calling fh_unlock within + * the function directly, or at a later time in fh_put(). So if you + * notice code paths that apparently fail to unlock the inode, don't + * worry--they have been taken care of. + * + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/version.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/fs.h> +#include <linux/major.h> +#include <linux/ext2_fs.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/net.h> +#include <linux/unistd.h> +#include <linux/malloc.h> +#include <linux/in.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> + +#if LINUX_VERSION_CODE >= 0x020100 +#include <asm/uaccess.h> +#endif + +#define NFSDDBG_FACILITY NFSDDBG_FILEOP + +/* Symbol not exported */ +static struct super_block *get_super(dev_t dev); + +/* Open mode for nfsd_open */ +#define OPEN_READ 0 +#define OPEN_WRITE 1 + +/* Hack until we have a macro check for mandatory locks. */ +#ifndef IS_ISMNDLK +#define IS_ISMNDLK(i) (((i)->i_mode & (S_ISGID|S_ISVTX)) == S_ISGID) +#endif + +/* Check for dir entries '.' and '..' */ +#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) + +/* + * This is a cache of readahead params that help us choose the proper + * readahead strategy. Initially, we set all readahead parameters to 0 + * and let the VFS handle things. + * If you increase the number of cached files very much, you'll need to + * add a hash table here. + */ +struct raparms { + struct raparms * p_next; + unsigned int p_count; + dev_t p_dev; + ino_t p_ino; + unsigned long p_reada, + p_ramax, + p_raend, + p_ralen, + p_rawin; +}; + +#define FILECACHE_MAX (2 * NFSD_MAXSERVS) +static struct raparms raparms[FILECACHE_MAX]; +static struct raparms * raparm_cache = 0; + +/* + * Deny access to certain file systems + */ +static inline int +fs_off_limits(struct super_block *sb) +{ + return !sb || sb->s_magic == NFS_SUPER_MAGIC + || sb->s_magic == PROC_SUPER_MAGIC; +} + +/* + * Check whether directory is a mount point + */ +static inline int +nfsd_iscovered(struct inode *inode) +{ + return inode->i_mount != NULL; +} + +/* + * Look up one component of a pathname. + */ +int +nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, + int len, struct svc_fh *resfh) +{ + struct svc_export *exp; + struct super_block *sb; + struct inode *dirp, *inode; + int perm, err, dotdot = 0; + + dprintk("nfsd: nfsd_lookup(fh %x/%ld, %s)\n", + SVCFH_DEV(fhp), SVCFH_INO(fhp), name); + + /* Obtain inode and export */ + if ((err = fh_lookup(rqstp, fhp, S_IFDIR, MAY_NOP)) != 0) + return err; + dirp = fhp->fh_inode; + exp = fhp->fh_export; + + /* check permissions before traversing mount-points */ + perm = nfsd_permission(exp, dirp, MAY_EXEC); + + dotdot = (len == 2 && name[0] == '.' && name[1] == '.'); + if (dotdot) { + if (dirp == current->fs->root) { + dirp->i_count++; + *resfh = *fhp; + return 0; + } + + if (dirp->i_dev == exp->ex_dev && dirp->i_ino == exp->ex_ino) { + dirp->i_count++; + *resfh = *fhp; + return 0; + } + } else if (len == 1 && name[0] == '.') { + len = 0; + } else if (fs_off_limits(dirp->i_sb)) { + /* No lookups on NFS mounts and procfs */ + return nfserr_noent; + } else if (nfsd_iscovered(dirp)) { + /* broken NFS client */ + return nfserr_acces; + } + if (!dirp->i_op || !dirp->i_op->lookup) + return nfserr_notdir; + if (perm != 0) + return perm; + if (!len) { + dirp->i_count++; + *resfh = *fhp; + return 0; + } + + dirp->i_count++; /* lookup eats the dirp inode */ + err = dirp->i_op->lookup(dirp, name, len, &inode); + + if (err) + return nfserrno(-err); + + /* Note that lookup() has already done a call to iget() so that + * the inode returned never refers to an inode covered by a mount. + * When this has happened, return the covered inode. + */ + if (!dotdot && (sb = inode->i_sb) && (inode == sb->s_mounted)) { + iput(inode); + inode = sb->s_covered; + inode->i_count++; + } + + fh_compose(resfh, exp, inode); + return 0; +} + +/* + * Set various file attributes. + */ +int +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap) +{ + struct inode *inode; + int accmode = MAY_SATTR; + int ftype = 0; + int imode; + int err; + + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= MAY_WRITE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Get inode */ + if ((err = fh_lookup(rqstp, fhp, ftype, accmode)) != 0) + return err; + + fh_lock(fhp); /* lock inode */ + inode = fhp->fh_inode; + + /* The size case is special... */ + if ((iap->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode)) { + if (iap->ia_size < inode->i_size) { + err = nfsd_permission(fhp->fh_export, inode, MAY_TRUNC); + if (err != 0) + return err; + } + if ((err = get_write_access(inode)) != 0) + return nfserrno(-err); + inode->i_size = iap->ia_size; + if (inode->i_op && inode->i_op->truncate) + inode->i_op->truncate(inode); + inode->i_dirt = 1; + put_write_access(inode); + iap->ia_valid &= ATTR_SIZE; + iap->ia_valid |= ATTR_MTIME; + iap->ia_mtime = CURRENT_TIME; + } + + imode = inode->i_mode; + if (iap->ia_valid & ATTR_MODE) { + iap->ia_mode &= S_IALLUGO; + imode = iap->ia_mode |= (imode & ~S_IALLUGO); + } + + /* Revoke setuid/setgid bit on chown/chgrp */ + if ((iap->ia_valid & ATTR_UID) && (imode & S_ISUID) + && iap->ia_uid != inode->i_uid) { + iap->ia_valid |= ATTR_MODE; + iap->ia_mode = imode &= ~S_ISUID; + } + if ((iap->ia_valid & ATTR_GID) && (imode & S_ISGID) + && iap->ia_gid != inode->i_gid) { + iap->ia_valid |= ATTR_MODE; + iap->ia_mode = imode &= ~S_ISGID; + } + + /* Change the attributes. */ + if (iap->ia_valid) { + iap->ia_valid |= ATTR_CTIME; + iap->ia_ctime = CURRENT_TIME; + err = nfsd_notify_change(inode, iap); + if (err) + return nfserrno(-err); + if (EX_ISSYNC(fhp->fh_export)) + nfsd_write_inode(inode); + } + + return 0; +} + +/* + * Open an existing file or directory. + * The wflag argument indicates write access. + */ +int +nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, + int wflag, struct file *filp) +{ + struct inode *inode; + int access, err; + + access = wflag? MAY_WRITE : MAY_READ; + if ((err = fh_lookup(rqstp, fhp, type, access)) != 0) + return err; + inode = fhp->fh_inode; + + /* Disallow access to files with the append-only bit set or + * with mandatory locking enabled */ + if (IS_APPEND(inode) || IS_ISMNDLK(inode)) + return nfserr_perm; + if (!inode->i_op || !inode->i_op->default_file_ops) + return nfserr_perm; + + if (wflag && (err = get_write_access(inode)) != 0) + return nfserrno(-err); + + memset(filp, 0, sizeof(*filp)); + filp->f_op = inode->i_op->default_file_ops; + filp->f_count = 1; + filp->f_flags = wflag? O_WRONLY : O_RDONLY; + filp->f_mode = wflag? FMODE_WRITE : FMODE_READ; + filp->f_inode = inode; + + if (filp->f_op->open) { + err = filp->f_op->open(inode, filp); + if (err) { + if (wflag) + put_write_access(inode); + filp->f_count--; + return nfserrno(-err); + } + } + + inode->i_count++; + return 0; +} + +/* + * Close a file. + */ +void +nfsd_close(struct file *filp) +{ + struct inode *inode; + + inode = filp->f_inode; + if (!inode->i_count) + printk(KERN_WARNING "nfsd: inode count == 0!\n"); + if (filp->f_op && filp->f_op->release) + filp->f_op->release(inode, filp); + if (filp->f_mode & FMODE_WRITE) + put_write_access(inode); + iput(inode); +} + +/* + * Sync a file + */ +void +nfsd_sync(struct inode *inode, struct file *filp) +{ + filp->f_op->fsync(inode, filp); +} + +/* + * Obtain the readahead parameters for the given file + */ +static inline struct raparms * +nfsd_get_raparms(dev_t dev, ino_t ino) +{ + struct raparms *ra, **rap, **frap = NULL; + + for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { + if (ra->p_dev != dev || ra->p_ino != ino) { + if (ra->p_count == 0) + frap = rap; + } else + goto found; + } + if (!frap) + return NULL; + rap = frap; + ra = *frap; + memset(ra, 0, sizeof(*ra)); +found: + if (rap != &raparm_cache) { + *rap = ra->p_next; + ra->p_next = raparm_cache; + raparm_cache = ra; + } + ra->p_count++; + return ra; +} + +/* + * Read data from a file. count must contain the requested read count + * on entry. On return, *count contains the number of bytes actually read. + */ +int +nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, char *buf, + unsigned long *count) +{ + struct raparms *ra; + struct inode *inode; + struct file file; + unsigned long oldfs; + int err; + + if ((err = nfsd_open(rqstp, fhp, S_IFREG, OPEN_READ, &file)) != 0) + return err; + inode = file.f_inode; + if (!file.f_op->read) { + nfsd_close(&file); + return nfserr_perm; + } + + /* Get readahead parameters */ + if ((ra = nfsd_get_raparms(inode->i_dev, inode->i_ino)) != NULL) { + file.f_reada = ra->p_reada; + file.f_ramax = ra->p_ramax; + file.f_raend = ra->p_raend; + file.f_ralen = ra->p_ralen; + file.f_rawin = ra->p_rawin; + } + file.f_pos = offset; + + oldfs = get_fs(); set_fs(KERNEL_DS); + err = file.f_op->read(file.f_inode, &file, buf, *count); + set_fs(oldfs); + + /* Write back readahead params */ + if (ra != NULL) { + dprintk("nfsd: raparms %ld %ld %ld %ld %ld\n", + file.f_reada, file.f_ramax, file.f_raend, + file.f_ralen, file.f_rawin); + ra->p_reada = file.f_reada; + ra->p_ramax = file.f_ramax; + ra->p_raend = file.f_raend; + ra->p_ralen = file.f_ralen; + ra->p_rawin = file.f_rawin; + ra->p_count -= 1; + } + + nfsd_close(&file); + + if (err < 0) + return nfserrno(-err); + *count = err; + return 0; +} + +/* + * Write data to a file. + * The stable flag requests synchronous writes. + */ +int +nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, + char *buf, unsigned long cnt, int stable) +{ + struct svc_export *exp; + struct file file; + struct inode *inode; + unsigned long oldfs; + int err; + + if (!cnt) + return 0; + if ((err = nfsd_open(rqstp, fhp, S_IFREG, OPEN_WRITE, &file)) != 0) + return err; + if (!file.f_op->write) { + nfsd_close(&file); + return nfserr_perm; + } + + inode = fhp->fh_inode; + exp = fhp->fh_export; + + /* + * Request sync writes if + * - the sync export option has been set, or + * - the client requested O_SYNC behavior (NFSv3 feature). + * When gathered writes have been configured for this volume, + * flushing the data to disk is handled separately below. + */ + if ((stable || (stable = EX_ISSYNC(exp))) && !EX_WGATHER(exp)) + file.f_flags |= O_SYNC; + + fh_lock(fhp); /* lock inode */ + file.f_pos = offset; /* set write offset */ + + /* Write the data. */ + oldfs = get_fs(); set_fs(KERNEL_DS); + err = file.f_op->write(inode, &file, buf, cnt); + set_fs(oldfs); + + /* clear setuid/setgid flag after write */ + if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) { + struct iattr ia; + + ia.ia_valid = ATTR_MODE; + ia.ia_mode = inode->i_mode & ~(S_ISUID | S_ISGID); + nfsd_notify_change(inode, &ia); + } + + fh_unlock(fhp); /* unlock inode */ + + if (err >= 0 && stable) { + static unsigned long last_ino = 0; + static kdev_t last_dev = NODEV; + + /* + * Gathered writes: If another process is currently + * writing to the file, there's a high chance + * this is another nfsd (triggered by a bulk write + * from a client's biod). Rather than syncing the + * file with each write request, we sleep for 10 msec. + * + * I don't know if this roughly approximates + * C. Juszak's idea of gathered writes, but it's a + * nice and simple solution (IMHO), and it seems to + * work:-) + */ + if (EX_WGATHER(exp) && (inode->i_writecount > 1 + || (last_ino == inode->i_ino && last_dev == inode->i_dev))) { +#if 0 + current->timeout = jiffies + 10 * HZ / 1000; + interruptible_sleep_on(&inode->i_wait); +#else + dprintk("nfsd: write defer %d\n", current->pid); + need_resched = 1; + current->timeout = jiffies + HZ / 100; + schedule(); + dprintk("nfsd: write resume %d\n", current->pid); +#endif + } + + if (inode->i_dirt) { + dprintk("nfsd: write sync %d\n", current->pid); + nfsd_sync(inode, &file); + nfsd_write_inode(inode); + } + wake_up(&inode->i_wait); + last_ino = inode->i_ino; + last_dev = inode->i_dev; + } + + nfsd_close(&file); + + dprintk("nfsd: write complete\n"); + return (err < 0)? nfserrno(-err) : 0; +} + +/* + * Create a file (regular, directory, device, fifo). + * UNIX sockets not yet implemented. + */ +int +nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, + char *fname, int flen, struct iattr *iap, + int type, dev_t rdev, struct svc_fh *resfhp) +{ + struct inode *dirp, *inode = NULL; + int err; + + if (!flen) + return nfserr_perm; + + if (!(iap->ia_valid & ATTR_MODE)) + iap->ia_mode = 0; + + if ((err = fh_lookup(rqstp, fhp, S_IFDIR, MAY_CREATE)) != 0) + return err; + + fh_lock(fhp); /* lock directory */ + dirp = fhp->fh_inode; + dirp->i_count++; /* dirop eats the inode */ + + switch (type) { + case S_IFREG: + if (!dirp->i_op || !dirp->i_op->create) + return nfserr_perm; + err = dirp->i_op->create(dirp, fname, flen, + iap->ia_mode, &inode); + break; + case S_IFDIR: + if (!dirp->i_op || !dirp->i_op->mkdir) + return nfserr_perm; + err = dirp->i_op->mkdir(dirp, fname, flen, iap->ia_mode); + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + if (!dirp->i_op || !dirp->i_op->mknod) + return nfserr_perm; + err = dirp->i_op->mknod(dirp, fname, flen, iap->ia_mode, rdev); + break; + default: + iput(dirp); + err = -EACCES; + } + + fh_unlock(fhp); + + if (err < 0) + return nfserrno(-err); + + /* + * If the VFS call doesn't return the inode, look it up now. + */ + if (inode == NULL) { + dirp->i_count++; + err = dirp->i_op->lookup(dirp, fname, flen, &inode); + if (err < 0) + return -nfserrno(err); /* Huh?! */ + } + + if (EX_ISSYNC(fhp->fh_export)) + nfsd_write_inode(dirp); + + /* Assemble the file handle for the newly created file */ + fh_compose(resfhp, fhp->fh_export, inode); + + /* Set file attributes. Mode has already been set and + * setting uid/gid works only for root. Irix appears to + * send along the gid when it tries to implement setgid + * directories via NFS. + */ + if ((iap->ia_valid &= (ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) { + if ((err = nfsd_setattr(rqstp, resfhp, iap)) != 0) { + fh_put(resfhp); + return err; + } + } + + return 0; +} + +/* + * Truncate a file. + * The calling routines must make sure to update the ctime + * field and call notify_change. + */ +int +nfsd_truncate(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned long size) +{ + struct inode *inode; + int err; + + if ((err = fh_lookup(rqstp, fhp, S_IFREG, MAY_WRITE|MAY_TRUNC)) != 0) + return err; + + fh_lock(fhp); /* lock inode if not yet locked */ + inode = fhp->fh_inode; + + if ((err = get_write_access(inode)) != 0) + return nfserrno(-err); + inode->i_size = size; + if (inode->i_op && inode->i_op->truncate) + inode->i_op->truncate(inode); + inode->i_dirt = 1; + put_write_access(inode); + + fh_unlock(fhp); + + return 0; +} + +/* + * Read a symlink. On entry, *lenp must contain the maximum path length that + * fits into the buffer. On return, it contains the true length. + */ +int +nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) +{ + struct inode *inode; + unsigned long oldfs; + int err; + + if ((err = fh_lookup(rqstp, fhp, S_IFLNK, MAY_READ)) != 0) + return err; + inode = fhp->fh_inode; + + if (!inode->i_op || !inode->i_op->readlink) + return nfserr_io; + + inode->i_count++; + oldfs = get_fs(); set_fs(KERNEL_DS); + err = inode->i_op->readlink(inode, buf, *lenp); + set_fs(oldfs); + + if (err < 0) + return nfserrno(-err); + *lenp = err; + + return 0; +} + +/* + * Create a symlink and look up its inode + */ +int +nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, + char *fname, int flen, + char *path, int plen, + struct svc_fh *resfhp) +{ + struct inode *dirp, *inode; + int err; + + if (!flen || !plen) + return nfserr_noent; + + if ((err = fh_lookup(rqstp, fhp, S_IFDIR, MAY_CREATE)) != 0) + return err; + + dirp = fhp->fh_inode; + if (nfsd_iscovered(dirp)) + return nfserr_perm; + if (!dirp->i_op || !dirp->i_op->symlink) + return nfserr_perm; + + fh_lock(fhp); /* lock inode */ + dirp->i_count++; + err = dirp->i_op->symlink(dirp, fname, flen, path); + fh_unlock(fhp); /* unlock inode */ + + if (err) + return nfserrno(-err); + + if (EX_ISSYNC(fhp->fh_export)) + nfsd_write_inode(dirp); + + /* + * Okay, now look up the inode of the new symlink. + */ + dirp->i_count++; /* lookup eats the dirp inode */ + err = dirp->i_op->lookup(dirp, fname, flen, &inode); + if (err) + return nfserrno(-err); + + fh_compose(resfhp, fhp->fh_export, inode); + return 0; +} + +/* + * Create a hardlink + */ +int +nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, + char *fname, int len, struct svc_fh *tfhp) +{ + struct inode *dirp, *dest; + int err; + + if ((err = fh_lookup(rqstp, ffhp, S_IFDIR, MAY_CREATE) != 0) || + (err = fh_lookup(rqstp, tfhp, S_IFREG, MAY_NOP)) != 0) + return err; + dirp = ffhp->fh_inode; + dest = tfhp->fh_inode; + + if (!len) + return nfserr_perm; + if (nfsd_iscovered(dirp)) + return nfserr_acces; + if (dirp->i_dev != dest->i_dev) + return nfserr_acces; /* FIXME: nxdev for NFSv3 */ + if (IS_IMMUTABLE(dest) /* || IS_APPEND(dest) */ ) + return nfserr_perm; + if (!dirp->i_op || !dirp->i_op->link) + return nfserr_perm; + + fh_lock(ffhp); /* lock directory inode */ + dirp->i_count++; + err = dirp->i_op->link(dest, dirp, fname, len); + fh_unlock(ffhp); /* unlock inode */ + + if (!err && EX_ISSYNC(ffhp->fh_export)) { + nfsd_write_inode(dirp); + nfsd_write_inode(dest); + } + + return err? nfserrno(-err) : 0; +} + +/* + * Rename a file + */ +int +nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, + struct svc_fh *tfhp, char *tname, int tlen) +{ + struct inode *fdir, *tdir; + int err; + + if ((err = fh_lookup(rqstp, ffhp, S_IFDIR, MAY_REMOVE) != 0) + || (err = fh_lookup(rqstp, tfhp, S_IFDIR, MAY_CREATE)) != 0) + return err; + fdir = ffhp->fh_inode; + tdir = tfhp->fh_inode; + + if (!flen || (fname[0] == '.' && + (flen == 1 || (flen == 2 && fname[1] == '.'))) || + !tlen || (tname[0] == '.' && + (tlen == 1 || (tlen == 2 && tname[1] == '.')))) + return nfserr_perm; + + if (fdir->i_dev != tdir->i_dev) + return nfserr_acces; /* nfserr_nxdev */ + if (!fdir->i_op || !fdir->i_op->rename) + return nfserr_perm; + + fh_lock(tfhp); /* lock destination directory */ + tdir->i_count++; + fdir->i_count++; + err = fdir->i_op->rename(fdir, fname, flen, tdir, tname, tlen, 0); + fh_unlock(tfhp); /* unlock inode */ + + if (!err && EX_ISSYNC(tfhp->fh_export)) { + nfsd_write_inode(fdir); + nfsd_write_inode(tdir); + } + + return err? nfserrno(-err) : 0; +} + +/* + * Unlink a file or directory + */ +int +nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, + char *fname, int flen) +{ + struct inode *dirp; + int err; + + if (!flen || isdotent(fname, flen)) + return nfserr_acces; + + if ((err = fh_lookup(rqstp, fhp, S_IFDIR, MAY_REMOVE)) != 0) + return err; + + fh_lock(fhp); /* lock inode */ + dirp = fhp->fh_inode; + + if (type == S_IFDIR) { + if (!dirp->i_op || !dirp->i_op->rmdir) + return nfserr_notdir; + dirp->i_count++; + err = dirp->i_op->rmdir(dirp, fname, flen); + } else { /* other than S_IFDIR */ + if (!dirp->i_op || !dirp->i_op->unlink) + return nfserr_perm; + dirp->i_count++; + err = dirp->i_op->unlink(dirp, fname, flen); + } + + fh_unlock(fhp); /* unlock inode */ + if (!err && EX_ISSYNC(fhp->fh_export)) + nfsd_write_inode(dirp); + + return err? nfserrno(-err) : 0; +} + +/* + * Read entries from a directory. + */ +int +nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, + encode_dent_fn func, u32 *buffer, int *countp) +{ + struct readdir_cd cd; + struct file file; + u32 *p; + int oldlen, eof, err; + + if (offset > ~(u32) 0) + return 0; + + if ((err = nfsd_open(rqstp, fhp, S_IFDIR, OPEN_READ, &file)) != 0) + return err; + + if (!file.f_op->readdir) { + nfsd_close(&file); + return nfserr_notdir; + } + file.f_pos = offset; + + /* Set up the readdir context */ + memset(&cd, 0, sizeof(cd)); + cd.rqstp = rqstp; + cd.buffer = buffer; + cd.buflen = *countp >> 2; + + /* + * Read the directory entries. This silly loop is necessary because + * readdir() is not guaranteed to fill up the entire buffer, but + * may choose to do less. + */ + do { + oldlen = cd.buflen; + + /* + dprintk("nfsd: f_op->readdir(%x/%ld @ %d) buflen = %d (%d)\n", + file.f_inode->i_dev, file.f_inode->i_ino, + (int) file.f_pos, (int) oldlen, (int) cd.buflen); + */ + err = file.f_op->readdir(file.f_inode, &file, + &cd, (filldir_t) func); + + if (err < 0) { + nfsd_close(&file); + return nfserrno(-err); + } + if (oldlen == cd.buflen) + break; + } while (oldlen != cd.buflen && !cd.eob); + + /* If we didn't fill the buffer completely, we're at EOF */ + eof = !cd.eob; + + /* Hewlett Packard ignores the eof flag on READDIR. Some + * fs-specific readdir implementations seem to reset f_pos to 0 + * at EOF however, causing an endless loop. */ + if (cd.offset && !eof) + *cd.offset = htonl(file.f_pos); + + /* Close the file */ + nfsd_close(&file); + + p = cd.buffer; + *p++ = 0; /* no more entries */ + *p++ = htonl(eof); /* end of directory */ + *countp = (caddr_t) p - (caddr_t) buffer; + + dprintk("nfsd: readdir result %d bytes, eof %d offset %ld\n", + *countp, eof, + cd.offset? ntohl(*cd.offset) : -1); + return 0; +} + +/* + * Get file system stats + */ +int +nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct statfs *stat) +{ + struct inode *inode; + struct super_block *sb; + unsigned long oldfs; + int err; + + if ((err = fh_lookup(rqstp, fhp, 0, MAY_NOP)) != 0) + return err; + inode = fhp->fh_inode; + + if (!(sb = inode->i_sb) || !sb->s_op->statfs) + return nfserr_io; + + oldfs = get_fs(); + set_fs (KERNEL_DS); + sb->s_op->statfs(sb, stat, sizeof(*stat)); + set_fs (oldfs); + + return 0; +} + +/* + * Check for a user's access permissions to this inode. + */ +int +nfsd_permission(struct svc_export *exp, struct inode *inode, int acc) +{ + int err; + + if (acc == MAY_NOP) + return 0; + + /* + dprintk("nfsd: permission 0x%x%s%s%s%s%s mode 0%o%s%s%s\n", + acc, + (acc & MAY_READ)? " read" : "", + (acc & MAY_WRITE)? " write" : "", + (acc & MAY_EXEC)? " exec" : "", + (acc & MAY_SATTR)? " sattr" : "", + (acc & MAY_TRUNC)? " trunc" : "", + inode->i_mode, + IS_IMMUTABLE(inode)? " immut" : "", + IS_APPEND(inode)? " append" : "", + IS_RDONLY(inode)? " ro" : ""); + dprintk(" owner %d/%d user %d/%d\n", + inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); + */ + + if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { + if (EX_RDONLY(exp) || IS_RDONLY(inode)) + return nfserr_rofs; + if (S_ISDIR(inode->i_mode) && nfsd_iscovered(inode)) + return nfserr_perm; + if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) + return nfserr_perm; + } + if ((acc & MAY_TRUNC) && IS_APPEND(inode)) + return nfserr_perm; + + /* + * The file owner always gets access permission. This is to make + * file access work even when the client has done a fchmod(fd, 0). + * + * However, `cp foo bar' should fail nevertheless when bar is + * readonly. A sensible way to do this might be to reject all + * attempts to truncate a read-only file, because a creat() call + * always implies file truncation. + */ + if (inode->i_uid == current->fsuid /* && !(acc & MAY_TRUNC) */) + return 0; + + err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC)); + + /* Allow read access to binaries even when mode 111 */ + if (err == -EPERM && S_ISREG(inode->i_mode) && acc == MAY_READ) + err = permission(inode, MAY_EXEC); + + return err? nfserrno(-err) : 0; +} + +/* + * Look up the inode for a given FH. + */ +struct inode * +nfsd_iget(dev_t dev, ino_t ino) +{ + struct super_block *sb; + + if (!(sb = get_super(dev)) || fs_off_limits(sb)) + return NULL; + return __iget(sb, ino, 0); +} + +/* + * Write the inode if dirty (copy of fs/inode.c:write_inode) + */ +void +nfsd_write_inode(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + + if (!inode->i_dirt) + return; + while (inode->i_lock) { + sleep_on(&inode->i_wait); + if (!inode->i_dirt) + return; + } + if (!sb || !sb->s_op || !sb->s_op->write_inode) { + inode->i_dirt = 0; + return; + } + inode->i_lock = 1; + sb->s_op->write_inode(inode); + inode->i_lock = 0; + wake_up(&inode->i_wait); +} + +/* + * Look up the root inode of the parent fs. + * We have to go through iget in order to allow for wait_on_inode. + */ +int +nfsd_parentdev(dev_t* devp) +{ + struct super_block *sb; + + if (!(sb = get_super(*devp)) || !sb->s_covered) + return 0; + if (*devp == sb->s_covered->i_dev) + return 0; + *devp = sb->s_covered->i_dev; + return 1; +} + +/* Duplicated here from fs/super.c because it's not exported */ +static struct super_block * +get_super(dev_t dev) +{ + struct super_block *s; + + if (!dev) + return NULL; + s = 0 + super_blocks; + while (s < NR_SUPER + super_blocks) + if (s->s_dev == dev) { + wait_on_super(s); + if (s->s_dev == dev) + return s; + s = 0 + super_blocks; + } else + s++; + return NULL; +} + +/* + * This is a copy from fs/inode.c because it wasn't exported. + */ +int +nfsd_notify_change(struct inode *inode, struct iattr *attr) +{ + int retval; + + if (inode->i_sb && inode->i_sb->s_op && + inode->i_sb->s_op->notify_change) + return inode->i_sb->s_op->notify_change(inode, attr); + + if ((retval = inode_change_ok(inode, attr)) != 0) + return retval; + + inode_setattr(inode, attr); + return 0; +} + +/* + * Initialize readahead param cache + */ +void +nfsd_racache_init(void) +{ + int i; + + if (raparm_cache) + return; + memset(raparms, 0, sizeof(raparms)); + for (i = 0; i < FILECACHE_MAX - 1; i++) { + raparms[i].p_next = raparms + i + 1; + } + raparm_cache = raparms; +} @@ -18,6 +18,8 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/file.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> #include <asm/bitops.h> @@ -27,19 +29,24 @@ asmlinkage int sys_statfs(const char * path, struct statfs * buf) struct inode * inode; int error; + lock_kernel(); error = verify_area(VERIFY_WRITE, buf, sizeof(struct statfs)); if (error) - return error; + goto out; error = namei(path,&inode); if (error) - return error; + goto out; + error = -ENOSYS; if (!inode->i_sb->s_op->statfs) { iput(inode); - return -ENOSYS; + goto out; } inode->i_sb->s_op->statfs(inode->i_sb, buf, sizeof(struct statfs)); iput(inode); - return 0; + error = 0; +out: + unlock_kernel(); + return error; } asmlinkage int sys_fstatfs(unsigned int fd, struct statfs * buf) @@ -48,19 +55,23 @@ asmlinkage int sys_fstatfs(unsigned int fd, struct statfs * buf) struct file * file; int error; + lock_kernel(); error = verify_area(VERIFY_WRITE, buf, sizeof(struct statfs)); if (error) - return error; + goto out; if (fd >= NR_OPEN || !(file = current->files->fd[fd])) - return -EBADF; - if (!(inode = file->f_inode)) - return -ENOENT; - if (!inode->i_sb) - return -ENODEV; - if (!inode->i_sb->s_op->statfs) - return -ENOSYS; - inode->i_sb->s_op->statfs(inode->i_sb, buf, sizeof(struct statfs)); - return 0; + error = -EBADF; + else if (!(inode = file->f_inode)) + error = -ENOENT; + else if (!inode->i_sb) + error = -ENODEV; + else if (!inode->i_sb->s_op->statfs) + error = -ENOSYS; + else + inode->i_sb->s_op->statfs(inode->i_sb, buf, sizeof(struct statfs)); +out: + unlock_kernel(); + return error; } int do_truncate(struct inode *inode, unsigned long length) @@ -87,29 +98,30 @@ asmlinkage int sys_truncate(const char * path, unsigned long length) struct inode * inode; int error; + lock_kernel(); error = namei(path,&inode); if (error) - return error; + goto out; error = -EACCES; if (S_ISDIR(inode->i_mode)) - goto out; + goto iput_and_out; error = permission(inode,MAY_WRITE); if (error) - goto out; + goto iput_and_out; error = -EROFS; if (IS_RDONLY(inode)) - goto out; + goto iput_and_out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; + goto iput_and_out; error = get_write_access(inode); if (error) - goto out; + goto iput_and_out; error = locks_verify_area(FLOCK_VERIFY_WRITE, inode, NULL, length < inode->i_size ? length : inode->i_size, @@ -120,8 +132,10 @@ asmlinkage int sys_truncate(const char * path, unsigned long length) error = do_truncate(inode, length); } put_write_access(inode); -out: +iput_and_out: iput(inode); +out: + unlock_kernel(); return error; } @@ -131,19 +145,23 @@ asmlinkage int sys_ftruncate(unsigned int fd, unsigned long length) struct file * file; int error; + lock_kernel(); if (fd >= NR_OPEN || !(file = current->files->fd[fd])) - return -EBADF; - if (!(inode = file->f_inode)) - return -ENOENT; - if (S_ISDIR(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) - return -EACCES; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - error = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, - length < inode->i_size ? length : inode->i_size, - abs(inode->i_size - length)); - if (!error) - error = do_truncate(inode, length); + error = -EBADF; + else if (!(inode = file->f_inode)) + error = -ENOENT; + else if (S_ISDIR(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) + error = -EACCES; + else if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + error = -EPERM; + else { + error = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, + length<inode->i_size ? length : inode->i_size, + abs(inode->i_size - length)); + if (!error) + error = do_truncate(inode, length); + } + unlock_kernel(); return error; } @@ -166,33 +184,37 @@ asmlinkage int sys_utime(char * filename, struct utimbuf * times) struct inode * inode; struct iattr newattrs; + lock_kernel(); error = namei(filename,&inode); if (error) - return error; + goto out; + error = -EROFS; if (IS_RDONLY(inode)) { iput(inode); - return -EROFS; + goto out; } /* Don't worry, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { - error = verify_area(VERIFY_READ, times, sizeof(*times)); + error = get_user(newattrs.ia_atime, ×->actime); + if (!error) + error = get_user(newattrs.ia_mtime, ×->modtime); if (error) { iput(inode); - return error; + goto out; } - get_user(newattrs.ia_atime, ×->actime); - get_user(newattrs.ia_mtime, ×->modtime); newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; } else { if (current->fsuid != inode->i_uid && (error = permission(inode,MAY_WRITE)) != 0) { iput(inode); - return error; + goto out; } } error = notify_change(inode, &newattrs); iput(inode); +out: + unlock_kernel(); return error; } @@ -208,34 +230,32 @@ asmlinkage int sys_utimes(char * filename, struct timeval * utimes) struct inode * inode; struct iattr newattrs; + lock_kernel(); error = namei(filename,&inode); if (error) - return error; - if (IS_RDONLY(inode)) { - iput(inode); - return -EROFS; - } + goto out; + error = -EROFS; + if (IS_RDONLY(inode)) + goto iput_and_out; /* Don't worry, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (utimes) { struct timeval times[2]; - error = verify_area(VERIFY_READ, utimes, sizeof(times)); - if (error) { - iput(inode); - return error; - } - copy_from_user(×, utimes, sizeof(times)); + error = -EFAULT; + if (copy_from_user(×, utimes, sizeof(times))) + goto iput_and_out; newattrs.ia_atime = times[0].tv_sec; newattrs.ia_mtime = times[1].tv_sec; newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; } else { - if ((error = permission(inode,MAY_WRITE)) != 0) { - iput(inode); - return error; - } + if ((error = permission(inode,MAY_WRITE)) != 0) + goto iput_and_out; } error = notify_change(inode, &newattrs); +iput_and_out: iput(inode); +out: + unlock_kernel(); return error; } @@ -247,10 +267,11 @@ asmlinkage int sys_access(const char * filename, int mode) { struct inode * inode; int old_fsuid, old_fsgid; - int res; + int res = -EINVAL; + lock_kernel(); if (mode != (mode & S_IRWXO)) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; + goto out; old_fsuid = current->fsuid; old_fsgid = current->fsgid; current->fsuid = current->uid; @@ -262,6 +283,8 @@ asmlinkage int sys_access(const char * filename, int mode) } current->fsuid = old_fsuid; current->fsgid = old_fsgid; +out: + unlock_kernel(); return res; } @@ -270,40 +293,51 @@ asmlinkage int sys_chdir(const char * filename) struct inode * inode; int error; + lock_kernel(); error = namei(filename,&inode); if (error) - return error; + goto out; + error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) { iput(inode); - return -ENOTDIR; + goto out; } if ((error = permission(inode,MAY_EXEC)) != 0) { iput(inode); - return error; + goto out; } iput(current->fs->pwd); current->fs->pwd = inode; - return (0); + error = 0; +out: + unlock_kernel(); + return error; } asmlinkage int sys_fchdir(unsigned int fd) { struct inode * inode; struct file * file; - int error; + int error = -EBADF; + lock_kernel(); if (fd >= NR_OPEN || !(file = current->files->fd[fd])) - return -EBADF; + goto out; + error = -ENOENT; if (!(inode = file->f_inode)) - return -ENOENT; + goto out; + error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) - return -ENOTDIR; + goto out; if ((error = permission(inode,MAY_EXEC)) != 0) - return error; + goto out; iput(current->fs->pwd); current->fs->pwd = inode; inode->i_count++; - return (0); + error = 0; +out: + unlock_kernel(); + return error; } asmlinkage int sys_chroot(const char * filename) @@ -311,20 +345,26 @@ asmlinkage int sys_chroot(const char * filename) struct inode * inode; int error; + lock_kernel(); error = namei(filename,&inode); if (error) - return error; + goto out; + error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) { iput(inode); - return -ENOTDIR; + goto out; } + error = -EPERM; if (!fsuser()) { iput(inode); - return -EPERM; + goto out; } iput(current->fs->root); current->fs->root = inode; - return (0); + error = 0; +out: + unlock_kernel(); + return error; } asmlinkage int sys_fchmod(unsigned int fd, mode_t mode) @@ -332,21 +372,29 @@ asmlinkage int sys_fchmod(unsigned int fd, mode_t mode) struct inode * inode; struct file * file; struct iattr newattrs; + int err = -EBADF; + lock_kernel(); if (fd >= NR_OPEN || !(file = current->files->fd[fd])) - return -EBADF; + goto out; + err = -ENOENT; if (!(inode = file->f_inode)) - return -ENOENT; + goto out; + err = -EROFS; if (IS_RDONLY(inode)) - return -EROFS; + goto out; + err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; + goto out; if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; inode->i_dirt = 1; - return notify_change(inode, &newattrs); + err = notify_change(inode, &newattrs); +out: + unlock_kernel(); + return err; } asmlinkage int sys_chmod(const char * filename, mode_t mode) @@ -355,24 +403,26 @@ asmlinkage int sys_chmod(const char * filename, mode_t mode) int error; struct iattr newattrs; + lock_kernel(); error = namei(filename,&inode); if (error) - return error; - if (IS_RDONLY(inode)) { - iput(inode); - return -EROFS; - } - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { - iput(inode); - return -EPERM; - } + goto out; + error = -EROFS; + if (IS_RDONLY(inode)) + goto iput_and_out; + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto iput_and_out; if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; inode->i_dirt = 1; error = notify_change(inode, &newattrs); +iput_and_out: iput(inode); +out: + unlock_kernel(); return error; } @@ -381,16 +431,20 @@ asmlinkage int sys_fchown(unsigned int fd, uid_t user, gid_t group) struct inode * inode; struct file * file; struct iattr newattrs; - int error; + int error = -EBADF; + lock_kernel(); if (fd >= NR_OPEN || !(file = current->files->fd[fd])) - return -EBADF; + goto out; + error = -ENOENT; if (!(inode = file->f_inode)) - return -ENOENT; + goto out; + error = -EROFS; if (IS_RDONLY(inode)) - return -EROFS; + goto out; + error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; + goto out; if (user == (uid_t) -1) user = inode->i_uid; if (group == (gid_t) -1) @@ -419,13 +473,16 @@ asmlinkage int sys_fchown(unsigned int fd, uid_t user, gid_t group) inode->i_dirt = 1; if (inode->i_sb && inode->i_sb->dq_op) { inode->i_sb->dq_op->initialize(inode, -1); + error = -EDQUOT; if (inode->i_sb->dq_op->transfer(inode, &newattrs, 0)) - return -EDQUOT; + goto out; error = notify_change(inode, &newattrs); if (error) inode->i_sb->dq_op->transfer(inode, &newattrs, 1); } else error = notify_change(inode, &newattrs); +out: + unlock_kernel(); return error; } @@ -435,17 +492,16 @@ asmlinkage int sys_chown(const char * filename, uid_t user, gid_t group) int error; struct iattr newattrs; + lock_kernel(); error = lnamei(filename,&inode); if (error) - return error; - if (IS_RDONLY(inode)) { - iput(inode); - return -EROFS; - } - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { - iput(inode); - return -EPERM; - } + goto out; + error = -EROFS; + if (IS_RDONLY(inode)) + goto iput_and_out; + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto iput_and_out; if (user == (uid_t) -1) user = inode->i_uid; if (group == (gid_t) -1) @@ -474,14 +530,18 @@ asmlinkage int sys_chown(const char * filename, uid_t user, gid_t group) inode->i_dirt = 1; if (inode->i_sb->dq_op) { inode->i_sb->dq_op->initialize(inode, -1); + error = -EDQUOT; if (inode->i_sb->dq_op->transfer(inode, &newattrs, 0)) - return -EDQUOT; + goto out; error = notify_change(inode, &newattrs); if (error) inode->i_sb->dq_op->transfer(inode, &newattrs, 1); } else error = notify_change(inode, &newattrs); +iput_and_out: iput(inode); +out: + unlock_kernel(); return(error); } @@ -576,17 +636,24 @@ asmlinkage int sys_open(const char * filename,int flags,int mode) char * tmp; int fd, error; + lock_kernel(); fd = get_unused_fd(); - if (fd < 0) - return fd; + if (fd < 0) { + error = fd; + goto out; + } error = getname(filename, &tmp); if (!error) { error = do_open(tmp,flags,mode, fd); putname(tmp); - if (!error) - return fd; + if (!error) { + error = fd; + goto out; + } } put_unused_fd(fd); +out: + unlock_kernel(); return error; } @@ -598,19 +665,27 @@ asmlinkage int sys_open(const char * filename,int flags,int mode) */ asmlinkage int sys_creat(const char * pathname, int mode) { - return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); + int ret; + + lock_kernel(); + ret = sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); + unlock_kernel(); + return ret; } #endif -void __fput(struct file *filp, struct inode *inode) +int __fput(struct file *filp, struct inode *inode) { + int error = 0; + if (filp->f_op && filp->f_op->release) - filp->f_op->release(inode,filp); + error = filp->f_op->release(inode,filp); filp->f_inode = NULL; if (filp->f_mode & FMODE_WRITE) put_write_access(inode); iput(inode); + return error; } int close_fp(struct file *filp) @@ -624,8 +699,7 @@ int close_fp(struct file *filp) inode = filp->f_inode; if (inode) locks_remove_locks(current, filp); - fput(filp, inode); - return 0; + return fput(filp, inode); } asmlinkage int sys_close(unsigned int fd) @@ -634,6 +708,7 @@ asmlinkage int sys_close(unsigned int fd) struct file * filp; struct files_struct * files; + lock_kernel(); files = current->files; error = -EBADF; if (fd < NR_OPEN && (filp = files->fd[fd]) != NULL) { @@ -642,6 +717,7 @@ asmlinkage int sys_close(unsigned int fd) files->fd[fd] = NULL; error = close_fp(filp); } + unlock_kernel(); return error; } @@ -651,10 +727,16 @@ asmlinkage int sys_close(unsigned int fd) */ asmlinkage int sys_vhangup(void) { + int ret = -EPERM; + + lock_kernel(); if (!suser()) - return -EPERM; + goto out; /* If there is a controlling tty, hang it up */ if (current->tty) tty_vhangup(current->tty); - return 0; + ret = 0; +out: + unlock_kernel(); + return ret; } @@ -12,13 +12,18 @@ #include <linux/termios.h> #include <linux/mm.h> +#include <asm/poll.h> #include <asm/uaccess.h> /* * Define this if you want SunOS compatibility wrt braindead * select behaviour on FIFO's. */ +#ifdef __sparc__ +#define FIFO_SUNOS_BRAINDAMAGE +#else #undef FIFO_SUNOS_BRAINDAMAGE +#endif /* We don't use the head/tail construction any more. Now we use the start/len*/ /* construction providing full use of PIPE_BUF (multiple of PAGE_SIZE) */ @@ -147,70 +152,51 @@ static long bad_pipe_w(struct inode * inode, struct file * filp, static int pipe_ioctl(struct inode *pino, struct file * filp, unsigned int cmd, unsigned long arg) { - int error; - switch (cmd) { case FIONREAD: - error = verify_area(VERIFY_WRITE, (void *) arg, sizeof(int)); - if (!error) - put_user(PIPE_SIZE(*pino),(int *) arg); - return error; + return put_user(PIPE_SIZE(*pino),(int *) arg); default: return -EINVAL; } } -static int pipe_select(struct inode * inode, struct file * filp, int sel_type, select_table * wait) +static unsigned int pipe_poll(struct file * filp, poll_table * wait) { - switch (sel_type) { - case SEL_IN: - if (!PIPE_EMPTY(*inode) || !PIPE_WRITERS(*inode)) - return 1; - select_wait(&PIPE_WAIT(*inode), wait); - return 0; - case SEL_OUT: - if (PIPE_EMPTY(*inode) || !PIPE_READERS(*inode)) - return 1; - select_wait(&PIPE_WAIT(*inode), wait); - return 0; - case SEL_EX: - if (!PIPE_READERS(*inode) || !PIPE_WRITERS(*inode)) - return 1; - select_wait(&inode->i_wait,wait); - return 0; - } - return 0; + unsigned int mask; + struct inode * inode = filp->f_inode; + + poll_wait(&PIPE_WAIT(*inode), wait); + mask = POLLIN | POLLRDNORM; + if (PIPE_EMPTY(*inode)) + mask = POLLOUT | POLLWRNORM; + if (!PIPE_WRITERS(*inode)) + mask |= POLLHUP; + if (!PIPE_READERS(*inode)) + mask |= POLLERR; + return mask; } #ifdef FIFO_SUNOS_BRAINDAMAGE /* * Arggh. Why does SunOS have to have different select() behaviour - * for pipes and fifos? Hate-Hate-Hate. See difference in SEL_IN.. + * for pipes and fifos? Hate-Hate-Hate. SunOS lacks POLLHUP.. */ -static int fifo_select(struct inode * inode, struct file * filp, int sel_type, select_table * wait) +static unsigned int fifo_poll(struct file * filp, poll_table * wait) { - switch (sel_type) { - case SEL_IN: - if (!PIPE_EMPTY(*inode)) - return 1; - select_wait(&PIPE_WAIT(*inode), wait); - return 0; - case SEL_OUT: - if (!PIPE_FULL(*inode) || !PIPE_READERS(*inode)) - return 1; - select_wait(&PIPE_WAIT(*inode), wait); - return 0; - case SEL_EX: - if (!PIPE_READERS(*inode) || !PIPE_WRITERS(*inode)) - return 1; - select_wait(&inode->i_wait,wait); - return 0; - } - return 0; + unsigned int mask; + struct inode * inode = filp->f_inode; + + poll_wait(&PIPE_WAIT(*inode), wait); + mask = POLLIN | POLLRDNORM; + if (PIPE_EMPTY(*inode)) + mask = POLLOUT | POLLWRNORM; + if (!PIPE_READERS(*inode)) + mask |= POLLERR; + return mask; } #else -#define fifo_select pipe_select +#define fifo_poll pipe_poll #endif /* FIFO_SUNOS_BRAINDAMAGE */ @@ -228,52 +214,42 @@ static long connect_read(struct inode * inode, struct file * filp, return pipe_read(inode,filp,buf,count); } -static int connect_select(struct inode * inode, struct file * filp, int sel_type, select_table * wait) +static unsigned int connect_poll(struct file * filp, poll_table * wait) { - switch (sel_type) { - case SEL_IN: - if (!PIPE_EMPTY(*inode)) { - filp->f_op = &read_fifo_fops; - return 1; - } - if (PIPE_WRITERS(*inode)) { - filp->f_op = &read_fifo_fops; - } - select_wait(&PIPE_WAIT(*inode), wait); - return 0; - case SEL_OUT: - if (!PIPE_FULL(*inode)) - return 1; - select_wait(&PIPE_WAIT(*inode), wait); - return 0; - case SEL_EX: - if (!PIPE_READERS(*inode) || !PIPE_WRITERS(*inode)) - return 1; - select_wait(&inode->i_wait,wait); - return 0; + struct inode * inode = filp->f_inode; + + poll_wait(&PIPE_WAIT(*inode), wait); + if (!PIPE_EMPTY(*inode)) { + filp->f_op = &read_fifo_fops; + return POLLIN | POLLRDNORM; } - return 0; + if (PIPE_WRITERS(*inode)) + filp->f_op = &read_fifo_fops; + return POLLOUT | POLLWRNORM; } -static void pipe_read_release(struct inode * inode, struct file * filp) +static int pipe_read_release(struct inode * inode, struct file * filp) { PIPE_READERS(*inode)--; wake_up_interruptible(&PIPE_WAIT(*inode)); + return 0; } -static void pipe_write_release(struct inode * inode, struct file * filp) +static int pipe_write_release(struct inode * inode, struct file * filp) { PIPE_WRITERS(*inode)--; wake_up_interruptible(&PIPE_WAIT(*inode)); + return 0; } -static void pipe_rdwr_release(struct inode * inode, struct file * filp) +static int pipe_rdwr_release(struct inode * inode, struct file * filp) { if (filp->f_mode & FMODE_READ) PIPE_READERS(*inode)--; if (filp->f_mode & FMODE_WRITE) PIPE_WRITERS(*inode)--; wake_up_interruptible(&PIPE_WAIT(*inode)); + return 0; } static int pipe_read_open(struct inode * inode, struct file * filp) @@ -306,7 +282,7 @@ struct file_operations connecting_fifo_fops = { connect_read, bad_pipe_w, NULL, /* no readdir */ - connect_select, + connect_poll, pipe_ioctl, NULL, /* no mmap on pipes.. surprise */ pipe_read_open, @@ -319,7 +295,7 @@ struct file_operations read_fifo_fops = { pipe_read, bad_pipe_w, NULL, /* no readdir */ - fifo_select, + fifo_poll, pipe_ioctl, NULL, /* no mmap on pipes.. surprise */ pipe_read_open, @@ -332,7 +308,7 @@ struct file_operations write_fifo_fops = { bad_pipe_r, pipe_write, NULL, /* no readdir */ - fifo_select, + fifo_poll, pipe_ioctl, NULL, /* mmap */ pipe_write_open, @@ -345,7 +321,7 @@ struct file_operations rdwr_fifo_fops = { pipe_read, pipe_write, NULL, /* no readdir */ - fifo_select, + fifo_poll, pipe_ioctl, NULL, /* mmap */ pipe_rdwr_open, @@ -358,7 +334,7 @@ struct file_operations read_pipe_fops = { pipe_read, bad_pipe_w, NULL, /* no readdir */ - pipe_select, + pipe_poll, pipe_ioctl, NULL, /* no mmap on pipes.. surprise */ pipe_read_open, @@ -371,7 +347,7 @@ struct file_operations write_pipe_fops = { bad_pipe_r, pipe_write, NULL, /* no readdir */ - pipe_select, + pipe_poll, pipe_ioctl, NULL, /* mmap */ pipe_write_open, @@ -384,7 +360,7 @@ struct file_operations rdwr_pipe_fops = { pipe_read, pipe_write, NULL, /* no readdir */ - pipe_select, + pipe_poll, pipe_ioctl, NULL, /* mmap */ pipe_rdwr_open, diff --git a/fs/proc/Makefile b/fs/proc/Makefile index eb3c80661..75ec3dd85 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,8 +8,17 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := proc.o -O_OBJS := inode.o root.o base.o mem.o link.o fd.o array.o kmsg.o net.o scsi.o +O_OBJS := inode.o root.o base.o generic.o mem.o link.o fd.o array.o \ + kmsg.o scsi.o proc_tty.o OX_OBJS := procfs_syms.o -M_OBJS := $(O_TARGET) +M_OBJS := + +ifeq ($(CONFIG_SUN_OPENPROMFS),y) +O_OBJS += openpromfs.o +else + ifeq ($(CONFIG_SUN_OPENPROMFS),m) + M_OBJS += openpromfs.o + endif +endif include $(TOPDIR)/Rules.make diff --git a/fs/proc/array.c b/fs/proc/array.c index 20cac345a..4671f1a98 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -46,6 +46,8 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -67,7 +69,7 @@ static long read_core(struct inode * inode, struct file * file, int count1; char * pnt; struct user dump; -#ifdef __i386__ +#if defined (__i386__) || defined (__mc68000__) # define FIRST_MAPPED PAGE_SIZE /* we don't have page 0 mapped on x86.. */ #else # define FIRST_MAPPED 0 @@ -121,7 +123,6 @@ struct inode_operations proc_kcore_inode_operations = { &proc_kcore_operations, }; - /* * This function accesses profiling information. The returned data is * binary: the sampling step and the actual contents of the profile @@ -142,7 +143,7 @@ static long read_profile(struct inode *inode, struct file *file, count = (prof_len+1)*sizeof(unsigned int) - p; read = 0; - while (p < sizeof(unsigned int) && count > 0) { + while (p < sizeof(unsigned long) && count > 0) { put_user(*((char *)(&sample_step)+p),buf); buf++; p++; count--; read++; } @@ -153,16 +154,38 @@ static long read_profile(struct inode *inode, struct file *file, return read; } -/* Writing to /proc/profile resets the counters */ +#ifdef __SMP__ + +extern int setup_profiling_timer (unsigned int multiplier); + +/* + * Writing to /proc/profile resets the counters + * + * Writing a 'profiling multiplier' value into it also re-sets the profiling + * interrupt frequency, on architectures that support this. + */ static long write_profile(struct inode * inode, struct file * file, const char * buf, unsigned long count) { - int i=prof_len; + int i=prof_len; + + if (count==sizeof(int)) { + unsigned int multiplier; - while (i--) - prof_buffer[i]=0UL; - return count; + if (copy_from_user(&multiplier, buf, sizeof(int))) + return -EFAULT; + + if (setup_profiling_timer(multiplier)) + return -EINVAL; + } + + while (i--) + prof_buffer[i]=0UL; + return count; } +#else +#define write_profile NULL +#endif static struct file_operations proc_profile_operations = { NULL, /* lseek */ @@ -194,7 +217,9 @@ static int get_kstat(char * buffer) int i, len; unsigned sum = 0; extern unsigned long total_forks; + unsigned long ticks; + ticks = jiffies * smp_num_cpus; for (i = 0 ; i < NR_IRQS ; i++) sum += kstat.interrupts[i]; len = sprintf(buffer, @@ -210,7 +235,7 @@ static int get_kstat(char * buffer) kstat.cpu_user, kstat.cpu_nice, kstat.cpu_system, - jiffies - (kstat.cpu_user + kstat.cpu_nice + kstat.cpu_system), + ticks - (kstat.cpu_user + kstat.cpu_nice + kstat.cpu_system), kstat.dk_drive[0], kstat.dk_drive[1], kstat.dk_drive[2], kstat.dk_drive[3], kstat.dk_drive_rio[0], kstat.dk_drive_rio[1], @@ -468,6 +493,30 @@ static unsigned long get_wchan(struct task_struct *p) } return pc; } +#elif defined(__mc68000__) + { + unsigned long fp, pc; + unsigned long stack_page; + int count = 0; + extern int sys_pause (void); + + stack_page = p->kernel_stack_page; + if (!stack_page) + return 0; + fp = ((struct switch_stack *)p->tss.ksp)->a6; + do { + if (fp < stack_page || fp >= 4088+stack_page) + return 0; + pc = ((unsigned long *)fp)[1]; + /* FIXME: This depends on the order of these functions. */ + if ((pc < (unsigned long) __down + || pc >= (unsigned long) add_timer) + && (pc < (unsigned long) schedule + || pc >= (unsigned long) sys_pause)) + return pc; + fp = *(unsigned long *) fp; + } while (count++ < 16); + } #endif return 0; @@ -484,11 +533,21 @@ static unsigned long get_wchan(struct task_struct *p) + (long)&((struct pt_regs *)0)->reg) # define KSTK_EIP(tsk) (*(unsigned long *)(tsk->kernel_stack_page + PT_REG(pc))) # define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->tss.usp) +#elif defined(__mc68000__) +#define KSTK_EIP(tsk) \ + ({ \ + unsigned long eip = 0; \ + if ((tsk)->tss.esp0 > PAGE_SIZE && \ + MAP_NR((tsk)->tss.esp0) < max_mapnr) \ + eip = ((struct pt_regs *) (tsk)->tss.esp0)->pc; \ + eip; }) +#define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->tss.usp) +#elif defined (__sparc_v9__) +# define KSTK_EIP(tsk) ((tsk)->tss.kregs->tpc) +# define KSTK_ESP(tsk) ((tsk)->tss.kregs->u_regs[UREG_FP]) #elif defined(__sparc__) -# define PT_REG(reg) (PAGE_SIZE - sizeof(struct pt_regs) \ - + (long)&((struct pt_regs *)0)->reg) -# define KSTK_EIP(tsk) (*(unsigned long *)(tsk->kernel_stack_page + PT_REG(pc))) -# define KSTK_ESP(tsk) (*(unsigned long *)(tsk->kernel_stack_page + PT_REG(u_regs[UREG_FP]))) +# define KSTK_EIP(tsk) ((tsk)->tss.kregs->pc) +# define KSTK_ESP(tsk) ((tsk)->tss.kregs->u_regs[UREG_FP]) #elif defined(__mips__) # define PT_REG(reg) ((long)&((struct pt_regs *)0)->reg \ - sizeof(struct pt_regs)) @@ -784,7 +843,7 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned ++*dirty; if (MAP_NR(pte_page(page)) >= max_mapnr) continue; - if (mem_map[MAP_NR(pte_page(page))].count > 1) + if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) > 1) ++*shared; } while (address < end); } @@ -997,9 +1056,13 @@ extern int get_pci_list(char*); extern int get_md_status (char *); extern int get_rtc_status (char *); extern int get_locks_status (char *); +extern int get_swaparea_info (char *); #ifdef __SMP_PROF__ extern int get_smp_prof_list(char *); #endif +#ifdef CONFIG_ZORRO +extern int zorro_get_list(char *); +#endif static long get_root_array(char * page, int type, char **start, off_t offset, unsigned long length) @@ -1041,6 +1104,9 @@ static long get_root_array(char * page, int type, char **start, case PROC_STAT: return get_kstat(page); + case PROC_SLABINFO: + return get_slabinfo(page); + case PROC_DEVICES: return get_device_list(page); @@ -1068,12 +1134,19 @@ static long get_root_array(char * page, int type, char **start, case PROC_MTAB: return get_filesystem_info( page ); + + case PROC_SWAP: + return get_swaparea_info(page); #ifdef CONFIG_RTC case PROC_RTC: return get_rtc_status(page); #endif case PROC_LOCKS: return get_locks_status(page); +#ifdef CONFIG_ZORRO + case PROC_ZORRO: + return zorro_get_list(page); +#endif } return -EBADF; } @@ -1160,7 +1233,7 @@ static struct file_operations proc_array_operations = { array_read, NULL, /* array_write */ NULL, /* array_readdir */ - NULL, /* array_select */ + NULL, /* array_poll */ NULL, /* array_ioctl */ NULL, /* mmap */ NULL, /* no special open code */ @@ -1206,7 +1279,7 @@ static struct file_operations proc_arraylong_operations = { arraylong_read, NULL, /* array_write */ NULL, /* array_readdir */ - NULL, /* array_select */ + NULL, /* array_poll */ NULL, /* array_ioctl */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/proc/base.c b/fs/proc/base.c index 2b6eece4a..28cd61f5f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -8,6 +8,7 @@ #include <asm/uaccess.h> +#include <linux/config.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/proc_fs.h> @@ -18,7 +19,7 @@ static struct file_operations proc_base_operations = { NULL, /* read - bad */ NULL, /* write - bad */ proc_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ @@ -146,8 +147,21 @@ static struct proc_dir_entry proc_pid_maps = { 0, &proc_arraylong_inode_operations, NULL, proc_pid_fill_inode, }; + +#if CONFIG_AP1000 +static struct proc_dir_entry proc_pid_ringbuf = { + PROC_PID_RINGBUF, 7, "ringbuf", + S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, + 0, &proc_ringbuf_inode_operations, + NULL, proc_pid_fill_inode, +}; +#endif + void proc_base_init(void) { +#if CONFIG_AP1000 + proc_register(&proc_pid, &proc_pid_ringbuf); +#endif proc_register(&proc_pid, &proc_pid_status); proc_register(&proc_pid, &proc_pid_mem); proc_register(&proc_pid, &proc_pid_cwd); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 8d4201844..e5e4fd9a8 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -21,7 +21,7 @@ static struct file_operations proc_fd_operations = { NULL, /* read - bad */ NULL, /* write - bad */ proc_readfd, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ @@ -106,7 +106,13 @@ static int proc_lookupfd(struct inode * dir, const char * name, int len, if (!pid || i >= NR_TASKS) return -ENOENT; - if (fd >= NR_OPEN || !p->files->fd[fd] || !p->files->fd[fd]->f_inode) + /* + * File handle is invalid if it is out of range, if the process + * has no files (Zombie) if the file is closed, or if its inode + * is NULL + */ + + if (fd >= NR_OPEN || !p->files || !p->files->fd[fd] || !p->files->fd[fd]->f_inode) return -ENOENT; ino = (pid << 16) + (PROC_PID_FD_DIR << 8) + fd; diff --git a/fs/proc/generic.c b/fs/proc/generic.c new file mode 100644 index 000000000..6e80e8298 --- /dev/null +++ b/fs/proc/generic.c @@ -0,0 +1,289 @@ +/* + * proc/fs/generic.c --- generic routines for the proc-fs + * + * This file contains generic proc-fs routines for handling + * directories and files. + * + * Copyright (C) 1991, 1992 Linus Torvalds. + * Copyright (C) 1997 Theodore Ts'o + */ + +#include <asm/uaccess.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <asm/bitops.h> + +static long proc_file_read(struct inode * inode, struct file * file, + char * buf, unsigned long nbytes); +static long proc_file_write(struct inode * inode, struct file * file, + const char * buffer, unsigned long count); +static long long proc_file_lseek(struct inode * inode, struct file * file, + long long offset, int orig); + +static struct file_operations proc_file_operations = { + proc_file_lseek, /* lseek */ + proc_file_read, /* read */ + proc_file_write, /* write */ + NULL, /* readdir */ + NULL, /* poll */ + NULL, /* ioctl */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +/* + * proc files can do almost nothing.. + */ +struct inode_operations proc_file_inode_operations = { + &proc_file_operations, /* default proc file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +/* + * compatibility to replace fs/proc/net.c + */ +struct inode_operations proc_net_inode_operations = { + &proc_file_operations, /* default net file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + + +#ifndef MIN +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +/* 4K page size but our output routines use some slack for overruns */ +#define PROC_BLOCK_SIZE (3*1024) + +static long proc_file_read(struct inode * inode, struct file * file, + char * buf, unsigned long nbytes) +{ + char *page; + int retval=0; + int eof=0; + int n, count; + char *start; + struct proc_dir_entry * dp; + + if (nbytes < 0) + return -EINVAL; + dp = (struct proc_dir_entry *) inode->u.generic_ip; + if (!(page = (char*) __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + while ((nbytes > 0) && !eof) + { + count = MIN(PROC_BLOCK_SIZE, nbytes); + + start = NULL; + if (dp->get_info) { + /* + * Handle backwards compatibility with the old net + * routines. + * + * XXX What gives with the file->f_flags & O_ACCMODE + * test? Seems stupid to me.... + */ + n = dp->get_info(page, &start, file->f_pos, count, + (file->f_flags & O_ACCMODE) == O_RDWR); + if (n < count) + eof = 1; + } else if (dp->read_proc) { + n = dp->read_proc(page, &start, file->f_pos, + count, &eof, dp->data); + } else + break; + + if (!start) { + /* + * For proc files that are less than 4k + */ + start = page + file->f_pos; + n -= file->f_pos; + if (n <= 0) + break; + if (n > count) + n = count; + } + if (n == 0) + break; /* End of file */ + if (n < 0) { + if (retval == 0) + retval = n; + break; + } + + n -= copy_to_user(buf, start, n); + if (n == 0) { + if (retval == 0) + retval = -EFAULT; + break; + } + + file->f_pos += n; /* Move down the file */ + nbytes -= n; + buf += n; + retval += n; + } + free_page((unsigned long) page); + return retval; +} + +static long +proc_file_write(struct inode * inode, struct file * file, + const char * buffer, unsigned long count) +{ + struct proc_dir_entry * dp; + + if (count < 0) + return -EINVAL; + dp = (struct proc_dir_entry *) inode->u.generic_ip; + + if (!dp->write_proc) + return -EIO; + + return dp->write_proc(file, buffer, count, dp->data); +} + + +static long long proc_file_lseek(struct inode * inode, struct file * file, + long long offset, int orig) +{ + switch (orig) { + case 0: + file->f_pos = offset; + return(file->f_pos); + case 1: + file->f_pos += offset; + return(file->f_pos); + case 2: + return(-EINVAL); + default: + return(-EINVAL); + } +} + +/* + * This function parses a name such as "tty/driver/serial", and + * returns the struct proc_dir_entry for "/proc/tty/driver", and + * returns "serial" in residual. + */ +static int xlate_proc_name(const char *name, + struct proc_dir_entry **ret, const char **residual) +{ + const char *cp = name, *next; + struct proc_dir_entry *de; + int len; + + de = &proc_root; + while (1) { + next = strchr(cp, '/'); + if (!next) + break; + + len = next - cp; + for (de = de->subdir; de ; de = de->next) { + if (proc_match(len, cp, de)) + break; + } + if (!de) + return -ENOENT; + cp += len + 1; + } + *residual = cp; + *ret = de; + return 0; +} + +struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, + struct proc_dir_entry *parent) +{ + struct proc_dir_entry *ent; + const char *fn; + + if (parent) + fn = name; + else { + if (xlate_proc_name(name, &parent, &fn)) + return NULL; + } + + ent = kmalloc(sizeof(struct proc_dir_entry), GFP_KERNEL); + if (!ent) + return NULL; + memset(ent, 0, sizeof(struct proc_dir_entry)); + + if (mode == S_IFDIR) + mode |= S_IRUGO | S_IXUGO; + else if (mode == 0) + mode = S_IFREG | S_IRUGO; + + ent->name = fn; + ent->namelen = strlen(fn); + ent->mode = mode; + if (S_ISDIR(mode)) + ent->nlink = 2; + else + ent->nlink = 1; + + proc_register(parent, ent); + + return ent; +} + +void remove_proc_entry(const char *name, struct proc_dir_entry *parent) +{ + struct proc_dir_entry *de; + const char *fn; + int len; + + if (parent) + fn = name; + else + if (xlate_proc_name(name, &parent, &fn)) + return; + len = strlen(fn); + + for (de = parent->subdir; de ; de = de->next) { + if (proc_match(len, fn, de)) + break; + } + if (de) + proc_unregister(parent, de->low_ino); + kfree(de); +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 73eb4a393..0ff6a4f5c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -12,12 +12,19 @@ #include <linux/stat.h> #include <linux/locks.h> #include <linux/limits.h> +#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> static void proc_put_inode(struct inode *inode) { +#ifdef CONFIG_SUN_OPENPROMFS_MODULE + if ((inode->i_ino >= PROC_OPENPROM_FIRST) + && (inode->i_ino < PROC_OPENPROM_FIRST + PROC_NOPENPROM) + && proc_openprom_use) + (*proc_openprom_use)(inode, 0); +#endif if (inode->i_nlink) return; inode->i_size = 0; @@ -74,6 +81,14 @@ static int parse_options(char *options,uid_t *uid,gid_t *gid) struct inode * proc_get_inode(struct super_block * s, int ino, struct proc_dir_entry * de) { struct inode * inode = iget(s, ino); + struct task_struct *p; + +#ifdef CONFIG_SUN_OPENPROMFS_MODULE + if ((inode->i_ino >= PROC_OPENPROM_FIRST) + && (inode->i_ino < PROC_OPENPROM_FIRST + PROC_NOPENPROM) + && proc_openprom_use) + (*proc_openprom_use)(inode, 1); +#endif if (inode && inode->i_sb == s) { inode->u.generic_ip = (void *) de; if (de) { @@ -92,13 +107,20 @@ struct inode * proc_get_inode(struct super_block * s, int ino, struct proc_dir_e de->fill_inode(inode); } } + /* + * Fixup the root inode's nlink value + */ + if (inode->i_ino == PROC_ROOT_INO) { + for_each_task(p) + if (p && p->pid) + inode->i_nlink++; + } return inode; } struct super_block *proc_read_super(struct super_block *s,void *data, int silent) { - proc_root_init(); lock_super(s); s->s_blocksize = 1024; s->s_blocksize_bits = 10; @@ -135,104 +157,33 @@ void proc_read_inode(struct inode * inode) struct task_struct * p; int i; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_blocks = 0; + inode->i_blksize = 1024; + ino = inode->i_ino; + if (ino >= PROC_OPENPROM_FIRST && ino < PROC_OPENPROM_FIRST + PROC_NOPENPROM) + return; inode->i_op = NULL; inode->i_mode = 0; inode->i_uid = 0; inode->i_gid = 0; inode->i_nlink = 1; inode->i_size = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_blocks = 0; - inode->i_blksize = 1024; - ino = inode->i_ino; pid = ino >> 16; + if (!pid) + return; p = task[0]; for (i = 0; i < NR_TASKS ; i++) if ((p = task[i]) && (p->pid == pid)) break; if (!p || i >= NR_TASKS) return; - if (ino == PROC_ROOT_INO) { - inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - inode->i_nlink = 2; - for (i = 1 ; i < NR_TASKS ; i++) - if (task[i]) - inode->i_nlink++; - return; - } - if (!pid) { - switch (ino) { - case PROC_KMSG: - inode->i_mode = S_IFREG | S_IRUSR; - inode->i_op = &proc_kmsg_inode_operations; - break; - case PROC_NET: - inode->i_nlink = 2; - break; - case PROC_SCSI: - inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - inode->i_nlink = 2; - inode->i_op = &proc_scsi_inode_operations; - break; - case PROC_KCORE: - inode->i_mode = S_IFREG | S_IRUSR; - inode->i_op = &proc_kcore_inode_operations; - inode->i_size = (MAP_NR(high_memory) << PAGE_SHIFT) + PAGE_SIZE; - break; - case PROC_PROFILE: - inode->i_mode = S_IFREG | S_IRUGO | S_IWUSR; - inode->i_op = &proc_profile_inode_operations; - inode->i_size = (1+prof_len) * sizeof(unsigned long); - break; - default: - inode->i_mode = S_IFREG | S_IRUGO; - inode->i_op = &proc_array_inode_operations; - break; - } - return; - } ino &= 0x0000ffff; if (ino == PROC_PID_INO || p->dumpable) { inode->i_uid = p->euid; inode->i_gid = p->egid; } - switch (ino) { - case PROC_PID_INO: - inode->i_nlink = 4; - return; - case PROC_PID_MEM: - inode->i_op = &proc_mem_inode_operations; - inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; - return; - case PROC_PID_CWD: - case PROC_PID_ROOT: - case PROC_PID_EXE: - inode->i_op = &proc_link_inode_operations; - inode->i_size = 64; - inode->i_mode = S_IFLNK | S_IRWXU; - return; - case PROC_PID_FD: - inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; - inode->i_op = &proc_fd_inode_operations; - inode->i_nlink = 2; - return; - case PROC_PID_ENVIRON: - inode->i_mode = S_IFREG | S_IRUSR; - inode->i_op = &proc_array_inode_operations; - return; - case PROC_PID_CMDLINE: - case PROC_PID_STATUS: - case PROC_PID_STAT: - case PROC_PID_STATM: - inode->i_mode = S_IFREG | S_IRUGO; - inode->i_op = &proc_array_inode_operations; - return; - case PROC_PID_MAPS: - inode->i_mode = S_IFIFO | S_IRUGO; - inode->i_op = &proc_arraylong_inode_operations; - return; - } switch (ino >> 8) { case PROC_PID_FD_DIR: ino &= 0xff; diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index add112153..1cc6a9c83 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -9,6 +9,7 @@ #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/poll.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -23,9 +24,10 @@ static int kmsg_open(struct inode * inode, struct file * file) return sys_syslog(1,NULL,0); } -static void kmsg_release(struct inode * inode, struct file * file) +static int kmsg_release(struct inode * inode, struct file * file) { (void) sys_syslog(0,NULL,0); + return 0; } static long kmsg_read(struct inode * inode, struct file * file, @@ -34,13 +36,11 @@ static long kmsg_read(struct inode * inode, struct file * file, return sys_syslog(2,buf,count); } -static int kmsg_select(struct inode *inode, struct file *file, int sel_type, select_table * wait) +static unsigned int kmsg_poll(struct file *file, poll_table * wait) { - if (sel_type != SEL_IN) - return 0; + poll_wait(&log_wait, wait); if (log_size) - return 1; - select_wait(&log_wait, wait); + return POLLIN | POLLRDNORM; return 0; } @@ -50,7 +50,7 @@ static struct file_operations proc_kmsg_operations = { kmsg_read, NULL, /* kmsg_write */ NULL, /* kmsg_readdir */ - kmsg_select, /* kmsg_select */ + kmsg_poll, /* kmsg_poll */ NULL, /* kmsg_ioctl */ NULL, /* mmap */ kmsg_open, diff --git a/fs/proc/link.c b/fs/proc/link.c index cd79cce07..bbaab7c41 100644 --- a/fs/proc/link.c +++ b/fs/proc/link.c @@ -33,7 +33,7 @@ static struct file_operations proc_fd_link_operations = { NULL, /* read - bad */ NULL, /* write - bad */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* very special open code */ diff --git a/fs/proc/mem.c b/fs/proc/mem.c index 0a0ac25e9..ccd67566f 100644 --- a/fs/proc/mem.c +++ b/fs/proc/mem.c @@ -298,7 +298,7 @@ int mem_mmap(struct inode * inode, struct file * file, set_pte(src_table, pte_mkdirty(*src_table)); set_pte(dest_table, *src_table); - mem_map[MAP_NR(pte_page(*src_table))].count++; + atomic_inc(&mem_map[MAP_NR(pte_page(*src_table))].count); stmp += PAGE_SIZE; dtmp += PAGE_SIZE; @@ -314,7 +314,7 @@ static struct file_operations proc_mem_operations = { mem_read, mem_write, NULL, /* mem_readdir */ - NULL, /* mem_select */ + NULL, /* mem_poll */ NULL, /* mem_ioctl */ mem_mmap, /* mmap */ NULL, /* no special open code */ diff --git a/fs/proc/net.c b/fs/proc/net.c index 7f11b8696..257487569 100644 --- a/fs/proc/net.c +++ b/fs/proc/net.c @@ -88,7 +88,7 @@ static struct file_operations proc_net_operations = { proc_readnet, /* read - bad */ NULL, /* write - bad */ NULL, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/proc/openpromfs.c b/fs/proc/openpromfs.c new file mode 100644 index 000000000..a9f84b9eb --- /dev/null +++ b/fs/proc/openpromfs.c @@ -0,0 +1,1095 @@ +/* $Id: openpromfs.c,v 1.13 1997/04/03 08:49:25 davem Exp $ + * openpromfs.c: /proc/openprom handling routines + * + * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/init.h> + +#include <asm/openprom.h> +#include <asm/oplib.h> +#include <asm/uaccess.h> + +#define ALIASES_NNODES 64 + +typedef struct { + u16 parent; + u16 next; + u16 child; + u16 first_prop; + u32 node; +} openpromfs_node; + +typedef struct { +#define OPP_STRING 0x10 +#define OPP_BINARY 0x20 +#define OPP_DIRTY 0x01 +#define OPP_QUOTED 0x02 +#define OPP_NOTQUOTED 0x04 +#define OPP_ASCIIZ 0x08 + u32 flag; + u32 alloclen; + u32 len; + char *value; + char name[8]; +} openprom_property; + +static openpromfs_node *nodes = NULL; +static int alloced = 0; +static u16 last_node = 0; +static u16 first_prop = 0; +static u16 options = 0xffff; +static u16 aliases = 0xffff; +static int aliases_nodes = 0; +static char *alias_names [ALIASES_NNODES]; +static struct inode_operations *proc_openprom_iops = 0; +static struct openpromfs_dev **devices; + +#define NODE(ino) nodes[ino - PROC_OPENPROM_FIRST] +#define NODE2INO(node) (node + PROC_OPENPROM_FIRST) +#define NODEP2INO(no) (no + PROC_OPENPROM_FIRST + last_node) + +static int openpromfs_create (struct inode *, const char *, int, int, + struct inode **); +static int openpromfs_readdir(struct inode *, struct file *, void *, filldir_t); +static int openpromfs_lookup(struct inode *, const char *, int, + struct inode **); +static int openpromfs_unlink (struct inode *, const char *, int); + +static long nodenum_read(struct inode *inode, struct file *file, + char *buf, unsigned long count) +{ + char buffer[10]; + + if (count < 0 || !inode->u.generic_ip) + return -EINVAL; + sprintf (buffer, "%8.8x\n", (u32)(inode->u.generic_ip)); + if (file->f_pos >= 9) + return 0; + if (count > 9 - file->f_pos) + count = 9 - file->f_pos; + copy_to_user(buf, buffer + file->f_pos, count); + file->f_pos += count; + return count; +} + +static long property_read(struct inode *inode, struct file *filp, + char *buf, unsigned long count) +{ + int i, j, k; + u32 node; + char *p; + u32 *q; + openprom_property *op; + + if (filp->f_pos >= 0xffffff) + return -EINVAL; + if (!filp->private_data) { + node = nodes[(u16)((uint)inode->u.generic_ip)].node; + i = ((u32)inode->u.generic_ip) >> 16; + if ((u16)((uint)inode->u.generic_ip) == aliases) { + if (i >= aliases_nodes) + p = 0; + else + p = alias_names [i]; + } else + for (p = prom_firstprop (node); + i && p && *p; + p = prom_nextprop (node, p), i--) + /* nothing */ ; + if (!p || !*p) + return -EIO; + i = prom_getproplen (node, p); + if (i < 0) { + if ((u16)((uint)inode->u.generic_ip) == aliases) + i = 0; + else + return -EIO; + } + k = i; + if (i < 64) i = 64; + filp->private_data = kmalloc (sizeof (openprom_property) + + (j = strlen (p)) + 2 * i, + GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + op = (openprom_property *)filp->private_data; + op->flag = 0; + op->alloclen = 2 * i; + strcpy (op->name, p); + op->value = (char *)(((unsigned long)(op->name + j + 4)) & ~3); + op->len = k; + if (k && prom_getproperty (node, p, op->value, i) < 0) + return -EIO; + op->value [k] = 0; + if (k) { + for (p = op->value; *p >= ' ' && *p <= '~'; p++); + if (p >= op->value + k - 1 && !*p) { + op->flag |= OPP_STRING; + if (p == op->value + k - 1) { + op->flag |= OPP_ASCIIZ; + op->len--; + } + } else if (!(k & 3)) + op->flag |= OPP_BINARY; + else { + printk ("/proc/openprom: Strange property " + "size %d\n", i); + return -EIO; + } + } + } else + op = (openprom_property *)filp->private_data; + if (!count || !op->len) return 0; + if (op->flag & OPP_STRING) + i = op->len + 3; + else + i = (op->len * 9)>>2; + k = filp->f_pos; + if (k >= i) return 0; + if (count > i - k) count = i - k; + if (op->flag & OPP_STRING) { + if (!k) { + *buf = '\''; + k++; + count--; + } + if (k + count >= i - 2) + j = i - 2 - k; + else + j = count; + if (j >= 0) { + copy_to_user(buf + k - filp->f_pos, + op->value + k - 1, j); + count -= j; + k += j; + } + if (count) + buf [k++ - filp->f_pos] = '\''; + if (count > 1) + buf [k++ - filp->f_pos] = '\n'; + } else if (op->flag & OPP_BINARY) { + char buffer[10]; + u32 *first, *last; + int first_off, last_cnt; + + first = ((u32 *)op->value) + k / 9; + first_off = k % 9; + last = ((u32 *)op->value) + (k + count - 1) / 9; + last_cnt = (k + count) % 9; + if (!last_cnt) last_cnt = 9; + + if (first == last) { + sprintf (buffer, "%08x.", *first); + memcpy (buf, buffer + first_off, last_cnt - first_off); + buf += last_cnt - first_off; + } else { + for (q = first; q <= last; q++) { + sprintf (buffer, "%08x.", *q); + if (q == first) { + memcpy (buf, buffer + first_off, + 9 - first_off); + buf += 9 - first_off; + } else if (q == last) { + memcpy (buf, buffer, last_cnt); + buf += last_cnt; + } else { + memcpy (buf, buffer, 9); + buf += 9; + } + } + } + if (last == (u32 *)(op->value + op->len - 4) && last_cnt == 9) + *(buf - 1) = '\n'; + k += count; + } + count = k - filp->f_pos; + filp->f_pos = k; + return count; +} + +static long property_write(struct inode *inode, struct file *filp, + const char *buf, unsigned long count) +{ + int i, j, k; + char *p; + u32 *q; + void *b; + openprom_property *op; + + if (filp->f_pos >= 0xffffff) + return -EINVAL; + if (!filp->private_data) { + i = property_read (inode, filp, NULL, 0); + if (i) + return i; + } + k = filp->f_pos; + op = (openprom_property *)filp->private_data; + if (!(op->flag & OPP_STRING)) { + u32 *first, *last; + int first_off, last_cnt; + u32 mask, mask2; + char tmp [9]; + int forcelen = 0; + + j = k % 9; + for (i = 0; i < count; i++, j++) { + if (j == 9) j = 0; + if (!j) { + if (buf [i] != '.') { + if (buf [i] != '\n') { + if (op->flag & OPP_BINARY) + return -EINVAL; + else + goto write_try_string; + } else { + count = i + 1; + forcelen = 1; + break; + } + } + } else { + if (buf [i] < '0' || + (buf [i] > '9' && buf [i] < 'A') || + (buf [i] > 'F' && buf [i] < 'a') || + buf [i] > 'f') { + if (op->flag & OPP_BINARY) + return -EINVAL; + else + goto write_try_string; + } + } + } + op->flag |= OPP_BINARY; + tmp [8] = 0; + i = ((count + k + 8) / 9) << 2; + if (op->alloclen <= i) { + b = kmalloc (sizeof (openprom_property) + 2 * i, + GFP_KERNEL); + if (!b) + return -ENOMEM; + memcpy (b, filp->private_data, + sizeof (openprom_property) + + strlen (op->name) + op->alloclen); + memset (((char *)b) + sizeof (openprom_property) + + strlen (op->name) + op->alloclen, + 0, 2 * i - op->alloclen); + op = (openprom_property *)b; + op->alloclen = 2*i; + b = filp->private_data; + filp->private_data = (void *)op; + kfree (b); + } + first = ((u32 *)op->value) + (k / 9); + first_off = k % 9; + last = (u32 *)(op->value + i); + last_cnt = (k + count) % 9; + if (first + 1 == last) { + memset (tmp, '0', 8); + memcpy (tmp + first_off, buf, (count + first_off > 8) ? + 8 - first_off : count); + mask = 0xffffffff; + mask2 = 0xffffffff; + for (j = 0; j < first_off; j++) + mask >>= 1; + for (j = 8 - count - first_off; j > 0; j--) + mask2 <<= 1; + mask &= mask2; + if (mask) { + *first &= ~mask; + *first |= simple_strtoul (tmp, 0, 16); + op->flag |= OPP_DIRTY; + } + } else { + op->flag |= OPP_DIRTY; + for (q = first; q < last; q++) { + if (q == first) { + if (first_off < 8) { + memset (tmp, '0', 8); + memcpy (tmp + first_off, buf, + 8 - first_off); + mask = 0xffffffff; + for (j = 0; j < first_off; j++) + mask >>= 1; + *q &= ~mask; + *q |= simple_strtoul (tmp,0,16); + } + buf += 9; + } else if ((q == last - 1) && last_cnt + && (last_cnt < 8)) { + memset (tmp, '0', 8); + memcpy (tmp, buf, last_cnt); + mask = 0xffffffff; + for (j = 0; j < 8 - last_cnt; j++) + mask <<= 1; + *q &= ~mask; + *q |= simple_strtoul (tmp, 0, 16); + buf += last_cnt; + } else { + *q = simple_strtoul (buf, 0, 16); + buf += 9; + } + } + } + if (!forcelen) { + if (op->len < i) + op->len = i; + } else + op->len = i; + filp->f_pos += count; + } +write_try_string: + if (!(op->flag & OPP_BINARY)) { + if (!(op->flag & (OPP_QUOTED | OPP_NOTQUOTED))) { + /* No way, if somebody starts writing from the middle, + * we don't know whether he uses quotes around or not + */ + if (k > 0) + return -EINVAL; + if (*buf == '\'') { + op->flag |= OPP_QUOTED; + buf++; + count--; + filp->f_pos++; + if (!count) { + op->flag |= OPP_STRING; + return 1; + } + } else + op->flag |= OPP_NOTQUOTED; + } + op->flag |= OPP_STRING; + if (op->alloclen <= count + filp->f_pos) { + b = kmalloc (sizeof (openprom_property) + + 2 * (count + filp->f_pos), GFP_KERNEL); + if (!b) + return -ENOMEM; + memcpy (b, filp->private_data, + sizeof (openprom_property) + + strlen (op->name) + op->alloclen); + memset (((char *)b) + sizeof (openprom_property) + + strlen (op->name) + op->alloclen, + 0, 2*(count - filp->f_pos) - op->alloclen); + op = (openprom_property *)b; + op->alloclen = 2*(count + filp->f_pos); + b = filp->private_data; + filp->private_data = (void *)op; + kfree (b); + } + p = op->value + filp->f_pos - ((op->flag & OPP_QUOTED) ? 1 : 0); + memcpy (p, buf, count); + op->flag |= OPP_DIRTY; + for (i = 0; i < count; i++, p++) + if (*p == '\n') { + *p = 0; + break; + } + if (i < count) { + op->len = p - op->value; + filp->f_pos += i + 1; + if ((p > op->value) && (op->flag & OPP_QUOTED) + && (*(p - 1) == '\'')) + op->len--; + } else { + if (p - op->value > op->len) + op->len = p - op->value; + filp->f_pos += count; + } + } + return filp->f_pos - k; +} + +int property_release (struct inode *inode, struct file *filp) +{ + openprom_property *op = (openprom_property *)filp->private_data; + unsigned long flags; + int error; + u32 node; + + if (!op) + return 0; + node = nodes[(u16)((uint)inode->u.generic_ip)].node; + if ((u16)((uint)inode->u.generic_ip) == aliases) { + if ((op->flag & OPP_DIRTY) && (op->flag & OPP_STRING)) { + char *p = op->name; + int i = (op->value - op->name) - strlen (op->name) - 1; + op->value [op->len] = 0; + *(op->value - 1) = ' '; + if (i) { + for (p = op->value - i - 2; p >= op->name; p--) + p[i] = *p; + p = op->name + i; + } + memcpy (p - 8, "nvalias ", 8); + prom_feval (p - 8); + } + } else if (op->flag & OPP_DIRTY) { + if (op->flag & OPP_STRING) { + op->value [op->len] = 0; + save_and_cli (flags); + error = prom_setprop (node, op->name, + op->value, op->len + 1); + restore_flags (flags); + if (error <= 0) + printk (KERN_WARNING "/proc/openprom: " + "Couldn't write property %s\n", + op->name); + } else if ((op->flag & OPP_BINARY) || !op->len) { + save_and_cli (flags); + error = prom_setprop (node, op->name, + op->value, op->len); + restore_flags (flags); + if (error <= 0) + printk (KERN_WARNING "/proc/openprom: " + "Couldn't write property %s\n", + op->name); + } else { + printk (KERN_WARNING "/proc/openprom: " + "Unknown property type of %s\n", + op->name); + } + } + kfree (filp->private_data); + return 0; +} + +static struct file_operations openpromfs_prop_ops = { + NULL, /* lseek - default */ + property_read, /* read */ + property_write, /* write - bad */ + NULL, /* readdir */ + NULL, /* poll - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + property_release, /* no special release code */ + NULL /* can't fsync */ +}; + +static struct inode_operations openpromfs_prop_inode_ops = { + &openpromfs_prop_ops, /* default property file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static struct file_operations openpromfs_nodenum_ops = { + NULL, /* lseek - default */ + nodenum_read, /* read */ + NULL, /* write - bad */ + NULL, /* readdir */ + NULL, /* poll - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +static struct inode_operations openpromfs_nodenum_inode_ops = { + &openpromfs_nodenum_ops,/* default .node file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static struct file_operations openprom_alias_operations = { + NULL, /* lseek - default */ + NULL, /* read - bad */ + NULL, /* write - bad */ + openpromfs_readdir, /* readdir */ + NULL, /* poll - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +static struct inode_operations openprom_alias_inode_operations = { + &openprom_alias_operations,/* default aliases directory file-ops */ + openpromfs_create, /* create */ + openpromfs_lookup, /* lookup */ + NULL, /* link */ + openpromfs_unlink, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int lookup_children(u16 n, const char * name, int len) +{ + int ret; + u16 node; + for (; n != 0xffff; n = nodes[n].next) { + node = nodes[n].child; + if (node != 0xffff) { + char buffer[128]; + int i; + char *p; + + while (node != 0xffff) { + if (prom_getname (nodes[node].node, + buffer, 128) >= 0) { + i = strlen (buffer); + if ((len == i) + && !strncmp (buffer, name, len)) + return NODE2INO(node); + p = strchr (buffer, '@'); + if (p && (len == p - buffer) + && !strncmp (buffer, name, len)) + return NODE2INO(node); + } + node = nodes[node].next; + } + } else + continue; + ret = lookup_children (nodes[n].child, name, len); + if (ret) return ret; + } + return 0; +} + +static int openpromfs_lookup(struct inode * dir, const char * name, int len, + struct inode ** result) +{ + int ino = 0; +#define OPFSL_DIR 0 +#define OPFSL_PROPERTY 1 +#define OPFSL_NODENUM 2 +#define OPFSL_DEVICE 3 + int type = 0; + char buffer[128]; + char *p; + u32 n; + u16 dirnode; + int i; + struct inode *inode; + struct openpromfs_dev *d = NULL; + + *result = NULL; + if (!dir || !S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOTDIR; + } + *result = dir; + if (!len) return 0; + if (name [0] == '.') { + if (len == 1) + return 0; + if (name [1] == '.' && len == 2) { + if (dir->i_ino == PROC_OPENPROM) { + inode = proc_get_inode (dir->i_sb, + PROC_ROOT_INO, + &proc_root); + iput(dir); + if (!inode) + return -EINVAL; + *result = inode; + return 0; + } + ino = NODE2INO(NODE(dir->i_ino).parent); + type = OPFSL_DIR; + } else if (len == 5 && !strncmp (name + 1, "node", 4)) { + ino = NODEP2INO(NODE(dir->i_ino).first_prop); + type = OPFSL_NODENUM; + } + } + if (!ino) { + u16 node = NODE(dir->i_ino).child; + while (node != 0xffff) { + if (prom_getname (nodes[node].node, buffer, 128) >= 0) { + i = strlen (buffer); + if (len == i && !strncmp (buffer, name, len)) { + ino = NODE2INO(node); + type = OPFSL_DIR; + break; + } + p = strchr (buffer, '@'); + if (p && (len == p - buffer) + && !strncmp (buffer, name, len)) { + ino = NODE2INO(node); + type = OPFSL_DIR; + break; + } + } + node = nodes[node].next; + } + } + n = NODE(dir->i_ino).node; + dirnode = dir->i_ino - PROC_OPENPROM_FIRST; + if (!ino) { + int j = NODEP2INO(NODE(dir->i_ino).first_prop); + if (dirnode != aliases) { + for (p = prom_firstprop (n); + p && *p; + p = prom_nextprop (n, p)) { + j++; + if ((len == strlen (p)) + && !strncmp (p, name, len)) { + ino = j; + type = OPFSL_PROPERTY; + break; + } + } + } else { + int k; + for (k = 0; k < aliases_nodes; k++) { + j++; + if (alias_names [k] + && (len == strlen (alias_names [k])) + && !strncmp (alias_names [k], name, len)) { + ino = j; + type = OPFSL_PROPERTY; + break; + } + } + } + } + if (!ino) { + for (d = *devices; d; d = d->next) + if ((d->node == n) && (strlen (d->name) == len) + && !strncmp (d->name, name, len)) { + ino = d->inode; + type = OPFSL_DEVICE; + break; + } + } + if (!ino) { + ino = lookup_children (NODE(dir->i_ino).child, name, len); + if (ino) + type = OPFSL_DIR; + else { + iput(dir); + return -ENOENT; + } + } + inode = proc_get_inode (dir->i_sb, ino, 0); + iput(dir); + if (!inode) + return -EINVAL; + switch (type) { + case OPFSL_DIR: + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + if (ino == PROC_OPENPROM_FIRST + aliases) { + inode->i_mode |= S_IWUSR; + inode->i_op = &openprom_alias_inode_operations; + } else + inode->i_op = proc_openprom_iops; + inode->i_nlink = 2; + break; + case OPFSL_NODENUM: + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_op = &openpromfs_nodenum_inode_ops; + inode->i_nlink = 1; + inode->u.generic_ip = (void *)(n); + break; + case OPFSL_PROPERTY: + if ((dirnode == options) && (len == 17) + && !strncmp (name, "security-password", 17)) + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; + else { + inode->i_mode = S_IFREG | S_IRUGO; + if (dirnode == options || dirnode == aliases) { + if (len != 4 || strncmp (name, "name", 4)) + inode->i_mode |= S_IWUSR; + } + } + inode->i_op = &openpromfs_prop_inode_ops; + inode->i_nlink = 1; + if (inode->i_size < 0) + inode->i_size = 0; + inode->u.generic_ip = (void *)(((u16)dirnode) | + (((u16)(ino - NODEP2INO(NODE(dir->i_ino).first_prop) - 1)) << 16)); + break; + case OPFSL_DEVICE: + inode->i_mode = d->mode; + inode->i_op = &chrdev_inode_operations; + inode->i_nlink = 1; + inode->i_rdev = d->rdev; + break; + } + *result = inode; + return 0; +} + +static int openpromfs_readdir(struct inode * inode, struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int ino; + u32 n; + int i, j; + char buffer[128]; + u16 node; + char *p; + struct openpromfs_dev *d; + + if (!inode || !S_ISDIR (inode->i_mode)) return -ENOTDIR; + ino = inode->i_ino; + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, ino) < 0) return 0; + i++; + filp->f_pos++; + /* fall thru */ + case 1: + if (filldir(dirent, "..", 2, i, + (NODE(ino).parent == 0xffff) ? + PROC_ROOT_INO : NODE2INO(NODE(ino).parent)) < 0) + return 0; + i++; + filp->f_pos++; + /* fall thru */ + default: + i -= 2; + node = NODE(ino).child; + while (i && node != 0xffff) { + node = nodes[node].next; + i--; + } + while (node != 0xffff) { + if (prom_getname (nodes[node].node, buffer, 128) < 0) + return 0; + if (filldir(dirent, buffer, strlen(buffer), + filp->f_pos, NODE2INO(node)) < 0) + return 0; + filp->f_pos++; + node = nodes[node].next; + } + j = NODEP2INO(NODE(ino).first_prop); + if (!i) { + if (filldir(dirent, ".node", 5, filp->f_pos, j) < 0) + return 0; + filp->f_pos++; + } else + i--; + n = NODE(ino).node; + if (ino == PROC_OPENPROM_FIRST + aliases) { + for (j++; i < aliases_nodes; i++, j++) { + if (alias_names [i]) { + if (filldir (dirent, alias_names [i], + strlen (alias_names [i]), + filp->f_pos, j) < 0) return 0; + filp->f_pos++; + } + } + } else { + for (p = prom_firstprop (n); + p && *p; + p = prom_nextprop (n, p)) { + j++; + if (i) i--; + else { + if (filldir(dirent, p, strlen(p), + filp->f_pos, j) < 0) + return 0; + filp->f_pos++; + } + } + } + for (d = *devices; d; d = d->next) { + if (d->node == n) { + if (i) i--; + else { + if (filldir(dirent, d->name, + strlen(d->name), + filp->f_pos, d->inode) < 0) + return 0; + filp->f_pos++; + } + } + } + } + return 0; +} + +static int openpromfs_create (struct inode *dir, const char *name, int len, + int mode, struct inode **result) +{ + char *p; + struct inode *inode; + + *result = NULL; + if (!dir) + return -ENOENT; + if (len > 256) { + iput (dir); + return -EINVAL; + } + if (aliases_nodes == ALIASES_NNODES) { + iput (dir); + return -EIO; + } + p = kmalloc (len + 1, GFP_KERNEL); + if (!p) { + iput (dir); + return -ENOMEM; + } + strncpy (p, name, len); + p [len] = 0; + alias_names [aliases_nodes++] = p; + inode = proc_get_inode (dir->i_sb, + NODEP2INO(NODE(dir->i_ino).first_prop) + + aliases_nodes, 0); + iput (dir); + if (!inode) + return -EINVAL; + inode->i_mode = S_IFREG | S_IRUGO | S_IWUSR; + inode->i_op = &openpromfs_prop_inode_ops; + inode->i_nlink = 1; + if (inode->i_size < 0) inode->i_size = 0; + inode->u.generic_ip = (void *)(((u16)aliases) | + (((u16)(aliases_nodes - 1)) << 16)); + *result = inode; + return 0; +} + +static int openpromfs_unlink (struct inode *dir, const char *name, int len) +{ + char *p; + int i; + + if (!dir) + return -ENOENT; + for (i = 0; i < aliases_nodes; i++) + if ((strlen (alias_names [i]) == len) + && !strncmp (name, alias_names[i], len)) { + char buffer[512]; + + p = alias_names [i]; + alias_names [i] = NULL; + kfree (p); + strcpy (buffer, "nvunalias "); + memcpy (buffer + 10, name, len); + buffer [10 + len] = 0; + prom_feval (buffer); + } + iput (dir); + return 0; +} + +/* {{{ init section */ +#ifndef MODULE +__initfunc(static int check_space (u16 n)) +#else +static int check_space (u16 n) +#endif +{ + unsigned long pages; + + if ((1 << alloced) * PAGE_SIZE < (n + 2) * sizeof(openpromfs_node)) { + pages = __get_free_pages (GFP_KERNEL, alloced + 1, 0); + if (!pages) + return -1; + + if (nodes) { + memcpy ((char *)pages, (char *)nodes, + (1 << alloced) * PAGE_SIZE); + free_pages ((unsigned long)nodes, alloced); + } + alloced++; + nodes = (openpromfs_node *)pages; + } + return 0; +} + +#ifndef MODULE +__initfunc(static u16 get_nodes (u16 parent, u32 node)) +#else +static u16 get_nodes (u16 parent, u32 node) +#endif +{ + char *p; + u16 n = last_node++, i; + + if (check_space (n) < 0) + return 0xffff; + nodes[n].parent = parent; + nodes[n].node = node; + nodes[n].next = 0xffff; + nodes[n].child = 0xffff; + nodes[n].first_prop = first_prop++; + if (!parent) { + char buffer[8]; + int j; + + if ((j = prom_getproperty (node, "name", buffer, 8)) >= 0) { + buffer[j] = 0; + if (!strcmp (buffer, "options")) + options = n; + else if (!strcmp (buffer, "aliases")) + aliases = n; + } + } + if (n != aliases) + for (p = prom_firstprop (node); + p && p != (char *)-1 && *p; + p = prom_nextprop (node, p)) + first_prop++; + else { + char *q; + for (p = prom_firstprop (node); + p && p != (char *)-1 && *p; + p = prom_nextprop (node, p)) { + if (aliases_nodes == ALIASES_NNODES) + break; + for (i = 0; i < aliases_nodes; i++) + if (!strcmp (p, alias_names [i])) + break; + if (i < aliases_nodes) + continue; + q = kmalloc (strlen (p) + 1, GFP_KERNEL); + if (!q) + return 0xffff; + strcpy (q, p); + alias_names [aliases_nodes++] = q; + } + first_prop += ALIASES_NNODES; + } + node = prom_getchild (node); + if (node) { + parent = get_nodes (n, node); + if (parent == 0xffff) + return 0xffff; + nodes[n].child = parent; + while ((node = prom_getsibling (node)) != 0) { + i = get_nodes (n, node); + if (i == 0xffff) + return 0xffff; + nodes[parent].next = i; + parent = i; + } + } + return n; +} + + +#ifdef MODULE +void openpromfs_use (struct inode *inode, int inc) +{ + static int root_fresh = 1; + static int dec_first = 1; +#ifdef OPENPROM_DEBUGGING + static int usec = 0; + + if (inc) { + if (inode->i_count == 1) + usec++; + else if (root_fresh && inode->i_ino == PROC_OPENPROM_FIRST) { + root_fresh = 0; + usec++; + } + } else { + if (inode->i_ino == PROC_OPENPROM_FIRST) + root_fresh = 0; + if (!dec_first) + usec--; + } + printk ("openpromfs_use: %d %d %d %d\n", + inode->i_ino, inc, usec, inode->i_count); +#else + if (inc) { + if (inode->i_count == 1) + MOD_INC_USE_COUNT; + else if (root_fresh && inode->i_ino == PROC_OPENPROM_FIRST) { + root_fresh = 0; + MOD_INC_USE_COUNT; + } + } else { + if (inode->i_ino == PROC_OPENPROM_FIRST) + root_fresh = 0; + if (!dec_first) + MOD_DEC_USE_COUNT; + } +#endif + dec_first = 0; +} + +#else +#define openpromfs_use 0 +#endif + +#ifndef MODULE +#define RET(x) +__initfunc(void openpromfs_init (void)) +#else + +EXPORT_NO_SYMBOLS; + +#define RET(x) -x +int init_module (void) +#endif +{ + if (!romvec->pv_romvers) + return RET(ENODEV); + nodes = (openpromfs_node *)__get_free_pages(GFP_KERNEL, 0, 0); + if (!nodes) { + printk (KERN_WARNING "/proc/openprom: can't get free page\n"); + return RET(EIO); + } + if (get_nodes (0xffff, prom_root_node) == 0xffff) { + printk (KERN_WARNING "/proc/openprom: couldn't setup tree\n"); + return RET(EIO); + } + nodes[last_node].first_prop = first_prop; + proc_openprom_iops = proc_openprom_register (openpromfs_readdir, + openpromfs_lookup, + openpromfs_use, + &devices); + return RET(0); +} + +#ifdef MODULE +void cleanup_module (void) +{ + int i; + proc_openprom_deregister (); + free_pages ((unsigned long)nodes, alloced); + for (i = 0; i < aliases_nodes; i++) + if (alias_names [i]) + kfree (alias_names [i]); + nodes = NULL; +} +#endif diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c new file mode 100644 index 000000000..470b7bffb --- /dev/null +++ b/fs/proc/proc_tty.c @@ -0,0 +1,186 @@ +/* + * proc_tty.c -- handles /proc/tty + * + * Copyright 1997, Theodore Ts'o + */ + +#include <asm/uaccess.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/tty.h> +#include <asm/bitops.h> + +extern struct tty_driver *tty_drivers; /* linked list of tty drivers */ +extern struct tty_ldisc ldiscs[]; + + +static int tty_drivers_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int tty_ldiscs_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data); + +/* + * The /proc/tty directory inodes... + */ +static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver; + +/* + * This is the handler for /proc/tty/drivers + */ +static int tty_drivers_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = 0; + off_t begin = 0; + struct tty_driver *p; + char range[20], deftype[20]; + char *type; + + for (p = tty_drivers; p; p = p->next) { + if (p->num > 1) + sprintf(range, "%d-%d", p->minor_start, + p->minor_start + p->num - 1); + else + sprintf(range, "%d", p->minor_start); + switch (p->type) { + case TTY_DRIVER_TYPE_SYSTEM: + if (p->subtype == SYSTEM_TYPE_TTY) + type = "system:/dev/tty"; + else if (p->subtype == SYSTEM_TYPE_CONSOLE) + type = "system:console"; + else + type = "system"; + break; + case TTY_DRIVER_TYPE_CONSOLE: + type = "console"; + break; + case TTY_DRIVER_TYPE_SERIAL: + if (p->subtype == 2) + type = "serial:callout"; + else + type = "serial"; + break; + case TTY_DRIVER_TYPE_PTY: + if (p->subtype == PTY_TYPE_MASTER) + type = "pty:master"; + else if (p->subtype == PTY_TYPE_SLAVE) + type = "pty:slave"; + else + type = "pty"; + break; + default: + sprintf(deftype, "type:%d.%d", p->type, p->subtype); + type = deftype; + break; + } + len += sprintf(page+len, "%-20s /dev/%-8s %3d %7s %s\n", + p->driver_name ? p->driver_name : "", + p->name, p->major, range, type); + if (len+begin > off+count) + break; + if (len+begin < off) { + begin += len; + len = 0; + } + } + if (!p) + *eof = 1; + if (off >= len+begin) + return 0; + *start = page + (begin-off); + return ((count < begin+len-off) ? count : begin+len-off); +} + +/* + * This is the handler for /proc/tty/ldiscs + */ +static int tty_ldiscs_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i; + int len = 0; + off_t begin = 0; + + for (i=0; i < NR_LDISCS; i++) { + if (!(ldiscs[i].flags & LDISC_FLAG_DEFINED)) + continue; + len += sprintf(page+len, "%-10s %2d\n", + ldiscs[i].name ? ldiscs[i].name : "???", i); + if (len+begin > off+count) + break; + if (len+begin < off) { + begin += len; + len = 0; + } + } + if (i >= NR_LDISCS) + *eof = 1; + if (off >= len+begin) + return 0; + *start = page + (begin-off); + return ((count < begin+len-off) ? count : begin+len-off); +} + +/* + * Thsi function is called by register_tty_driver() to handle + * registering the driver's /proc handler into /proc/tty/driver/<foo> + */ +void proc_tty_register_driver(struct tty_driver *driver) +{ + struct proc_dir_entry *ent; + + if ((!driver->read_proc && !driver->write_proc) || + !driver->driver_name || + driver->proc_entry) + return; + + ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); + if (!ent) + return; + ent->read_proc = driver->read_proc; + ent->write_proc = driver->write_proc; + ent->data = driver; + + driver->proc_entry = ent; +} + +/* + * This function is called by unregister_tty_driver() + */ +void proc_tty_unregister_driver(struct tty_driver *driver) +{ + struct proc_dir_entry *ent; + + ent = driver->proc_entry; + if (!ent) + return; + + proc_unregister(proc_tty_driver, ent->low_ino); + + driver->proc_entry = 0; + kfree(ent); +} + +/* + * Called by proc_root_init() to initialize the /proc/tty subtree + */ +void proc_tty_init(void) +{ + struct proc_dir_entry *ent; + + ent = create_proc_entry("tty", S_IFDIR, 0); + if (!ent) + return; + proc_tty_ldisc = create_proc_entry("tty/ldisc", S_IFDIR, 0); + proc_tty_driver = create_proc_entry("tty/driver", S_IFDIR, 0); + + ent = create_proc_entry("tty/ldiscs", 0, 0); + ent->read_proc = tty_ldiscs_read_proc; + + ent = create_proc_entry("tty/drivers", 0, 0); + ent->read_proc = tty_drivers_read_proc; +} + diff --git a/fs/proc/procfs_syms.c b/fs/proc/procfs_syms.c index 7a538e240..809a26084 100644 --- a/fs/proc/procfs_syms.c +++ b/fs/proc/procfs_syms.c @@ -1,3 +1,4 @@ +#include <linux/config.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/proc_fs.h> @@ -8,30 +9,32 @@ * in the procfs. */ extern int (* dispatch_scsi_info_ptr) (int ino, char *buffer, char **start, - off_t offset, int length, int inout); + off_t offset, int length, int inout); extern struct inode_operations proc_scsi_inode_operations; -static struct symbol_table procfs_syms = { -/* Should this be surrounded with "#ifdef CONFIG_MODULES" ? */ -#include <linux/symtab_begin.h> - X(proc_register), - X(proc_register_dynamic), - X(proc_unregister), - X(proc_root), - X(in_group_p), - X(generate_cluster), - X(proc_net_inode_operations), - X(proc_net), +EXPORT_SYMBOL(proc_register); +EXPORT_SYMBOL(proc_unregister); +EXPORT_SYMBOL(create_proc_entry); +EXPORT_SYMBOL(remove_proc_entry); +EXPORT_SYMBOL(proc_root); +EXPORT_SYMBOL(proc_get_inode); +EXPORT_SYMBOL(in_group_p); +EXPORT_SYMBOL(proc_dir_inode_operations); +EXPORT_SYMBOL(proc_net_inode_operations); +EXPORT_SYMBOL(proc_net); - /* - * This is required so that if we load scsi later, that the - * scsi code can attach to /proc/scsi in the correct manner. - */ - X(proc_scsi), - X(proc_scsi_inode_operations), - X(dispatch_scsi_info_ptr), -#include <linux/symtab_end.h> -}; +/* + * This is required so that if we load scsi later, that the + * scsi code can attach to /proc/scsi in the correct manner. + */ +EXPORT_SYMBOL(proc_scsi); +EXPORT_SYMBOL(proc_scsi_inode_operations); +EXPORT_SYMBOL(dispatch_scsi_info_ptr); + +#if defined(CONFIG_SUN_OPENPROMFS_MODULE) +EXPORT_SYMBOL(proc_openprom_register); +EXPORT_SYMBOL(proc_openprom_deregister); +#endif static struct file_system_type proc_fs_type = { proc_read_super, "proc", 0, NULL @@ -39,10 +42,5 @@ static struct file_system_type proc_fs_type = { int init_proc_fs(void) { - int status; - - if ((status = register_filesystem(&proc_fs_type)) == 0) - status = register_symtab(&procfs_syms); - return status; + return register_filesystem(&proc_fs_type) == 0; } - diff --git a/fs/proc/root.c b/fs/proc/root.c index 70150f587..e5bb9d51b 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -14,6 +14,9 @@ #include <linux/stat.h> #include <linux/config.h> #include <asm/bitops.h> +#ifdef CONFIG_KERNELD +#include <linux/kerneld.h> +#endif /* * Offset of the first process in the /proc root directory.. @@ -38,7 +41,7 @@ static struct file_operations proc_dir_operations = { NULL, /* read - bad */ NULL, /* write - bad */ proc_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ @@ -79,7 +82,7 @@ static struct file_operations proc_root_operations = { NULL, /* read - bad */ NULL, /* write - bad */ proc_root_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ @@ -122,23 +125,19 @@ struct proc_dir_entry proc_root = { &proc_root, NULL }; -struct proc_dir_entry proc_net = { - PROC_NET, 3, "net", - S_IFDIR | S_IRUGO | S_IXUGO, 2, 0, 0, - 0, &proc_dir_inode_operations, - NULL, NULL, - NULL, - NULL, NULL -}; +struct proc_dir_entry *proc_net, *proc_scsi; -struct proc_dir_entry proc_scsi = { - PROC_SCSI, 4, "scsi", +#ifdef CONFIG_MCA +struct proc_dir_entry proc_mca = { + PROC_MCA, 3, "mca", S_IFDIR | S_IRUGO | S_IXUGO, 2, 0, 0, 0, &proc_dir_inode_operations, NULL, NULL, NULL, &proc_root, NULL }; +#endif +#ifdef CONFIG_SYSCTL struct proc_dir_entry proc_sys_root = { PROC_SYS, 3, "sys", /* inode, name */ S_IFDIR | S_IRUGO | S_IXUGO, 2, 0, 0, /* mode, nlink, uid, gid */ @@ -147,14 +146,181 @@ struct proc_dir_entry proc_sys_root = { NULL, /* next */ NULL, NULL /* parent, subdir */ }; +#endif + +#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) + +static int (*proc_openprom_defreaddir_ptr)(struct inode *, struct file *, void *, filldir_t); +static int (*proc_openprom_deflookup_ptr)(struct inode *, const char *, int, struct inode **); +void (*proc_openprom_use)(struct inode *, int) = 0; +static struct openpromfs_dev *proc_openprom_devices = NULL; +static ino_t proc_openpromdev_ino = PROC_OPENPROMD_FIRST; + +struct inode_operations * +proc_openprom_register(int (*readdir)(struct inode *, struct file *, void *, filldir_t), + int (*lookup)(struct inode *, const char *, int, struct inode **), + void (*use)(struct inode *, int), + struct openpromfs_dev ***devices) +{ + proc_openprom_defreaddir_ptr = (proc_openprom_inode_operations.default_file_ops)->readdir; + proc_openprom_deflookup_ptr = proc_openprom_inode_operations.lookup; + (proc_openprom_inode_operations.default_file_ops)->readdir = readdir; + proc_openprom_inode_operations.lookup = lookup; + proc_openprom_use = use; + *devices = &proc_openprom_devices; + return &proc_openprom_inode_operations; +} + +int proc_openprom_regdev(struct openpromfs_dev *d) +{ + if (proc_openpromdev_ino == PROC_OPENPROMD_FIRST + PROC_NOPENPROMD) return -1; + d->next = proc_openprom_devices; + d->inode = proc_openpromdev_ino++; + proc_openprom_devices = d; + return 0; +} + +int proc_openprom_unregdev(struct openpromfs_dev *d) +{ + if (d == proc_openprom_devices) { + proc_openprom_devices = d->next; + } else if (!proc_openprom_devices) + return -1; + else { + struct openpromfs_dev *p; + + for (p = proc_openprom_devices; p->next != d && p->next; p = p->next); + if (!p->next) return -1; + p->next = d->next; + } + return 0; +} + +#ifdef CONFIG_SUN_OPENPROMFS_MODULE +void +proc_openprom_deregister(void) +{ + (proc_openprom_inode_operations.default_file_ops)->readdir = proc_openprom_defreaddir_ptr; + proc_openprom_inode_operations.lookup = proc_openprom_deflookup_ptr; + proc_openprom_use = 0; +} +#endif + +#if defined(CONFIG_SUN_OPENPROMFS_MODULE) && defined(CONFIG_KERNELD) +static int +proc_openprom_defreaddir(struct inode * inode, struct file * filp, + void * dirent, filldir_t filldir) +{ + request_module("openpromfs"); + if ((proc_openprom_inode_operations.default_file_ops)->readdir != + proc_openprom_defreaddir) + return (proc_openprom_inode_operations.default_file_ops)->readdir + (inode, filp, dirent, filldir); + return -EINVAL; +} + +static int +proc_openprom_deflookup(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + request_module("openpromfs"); + if (proc_openprom_inode_operations.lookup != + proc_openprom_deflookup) + return proc_openprom_inode_operations.lookup + (dir, name, len, result); + iput(dir); + return -ENOENT; +} +#endif + +static struct file_operations proc_openprom_operations = { + NULL, /* lseek - default */ + NULL, /* read - bad */ + NULL, /* write - bad */ +#if defined(CONFIG_SUN_OPENPROMFS_MODULE) && defined(CONFIG_KERNELD) + proc_openprom_defreaddir,/* readdir */ +#else + NULL, /* readdir */ +#endif + NULL, /* poll - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +struct inode_operations proc_openprom_inode_operations = { + &proc_openprom_operations,/* default net directory file-ops */ + NULL, /* create */ +#if defined(CONFIG_SUN_OPENPROMFS_MODULE) && defined(CONFIG_KERNELD) + proc_openprom_deflookup,/* lookup */ +#else + NULL, /* lookup */ +#endif + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +struct proc_dir_entry proc_openprom = { + PROC_OPENPROM, 8, "openprom", + S_IFDIR | S_IRUGO | S_IXUGO, 2, 0, 0, + 0, &proc_openprom_inode_operations, + NULL, NULL, + NULL, + &proc_root, NULL +}; + +extern void openpromfs_init (void); +#endif /* CONFIG_SUN_OPENPROMFS */ + +static int make_inode_number(void) +{ + int i = find_first_zero_bit((void *) proc_alloc_map, PROC_NDYNAMIC); + if (i<0 || i>=PROC_NDYNAMIC) + return -1; + set_bit(i, (void *) proc_alloc_map); + return PROC_DYNAMIC_FIRST + i; +} int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { + int i; + + if (dp->low_ino == 0) { + i = make_inode_number(); + if (i < 0) + return -EAGAIN; + dp->low_ino = i; + } dp->next = dir->subdir; dp->parent = dir; dir->subdir = dp; - if (S_ISDIR(dp->mode)) + if (S_ISDIR(dp->mode)) { + if (dp->ops == NULL) + dp->ops = &proc_dir_inode_operations; dir->nlink++; + } else { + if (dp->ops == NULL) + dp->ops = &proc_file_inode_operations; + } + /* + * kludge until we fixup the md device driver + */ + if (dp->low_ino == PROC_MD) + dp->ops = &proc_array_inode_operations; return 0; } @@ -179,30 +345,6 @@ int proc_unregister(struct proc_dir_entry * dir, int ino) return -EINVAL; } -static int make_inode_number(void) -{ - int i = find_first_zero_bit((void *) proc_alloc_map, PROC_NDYNAMIC); - if (i<0 || i>=PROC_NDYNAMIC) - return -1; - set_bit(i, (void *) proc_alloc_map); - return PROC_DYNAMIC_FIRST + i; -} - -int proc_register_dynamic(struct proc_dir_entry * dir, - struct proc_dir_entry * dp) -{ - int i = make_inode_number(); - if (i < 0) - return -EAGAIN; - dp->low_ino = i; - dp->next = dir->subdir; - dp->parent = dir; - dir->subdir = dp; - if (S_ISDIR(dp->mode)) - dir->nlink++; - return 0; -} - /* * /proc/self: */ @@ -223,7 +365,7 @@ static int proc_self_readlink(struct inode * inode, char * buffer, int buflen) char tmp[30]; iput(inode); - len = 1 + sprintf(tmp, "%d", current->pid); + len = sprintf(tmp, "%d", current->pid); if (buflen < len) len = buflen; copy_to_user(buffer, tmp, len); @@ -253,32 +395,46 @@ static struct inode_operations proc_self_inode_operations = { static struct proc_dir_entry proc_root_loadavg = { PROC_LOADAVG, 7, "loadavg", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_uptime = { PROC_UPTIME, 6, "uptime", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_meminfo = { PROC_MEMINFO, 7, "meminfo", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_kmsg = { PROC_KMSG, 4, "kmsg", S_IFREG | S_IRUSR, 1, 0, 0, + 0, &proc_kmsg_inode_operations }; static struct proc_dir_entry proc_root_version = { PROC_VERSION, 7, "version", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; #ifdef CONFIG_PCI static struct proc_dir_entry proc_root_pci = { PROC_PCI, 3, "pci", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations +}; +#endif +#ifdef CONFIG_ZORRO +static struct proc_dir_entry proc_root_zorro = { + PROC_ZORRO, 5, "zorro", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; #endif static struct proc_dir_entry proc_root_cpuinfo = { PROC_CPUINFO, 7, "cpuinfo", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_self = { PROC_SELF, 4, "self", @@ -289,82 +445,103 @@ static struct proc_dir_entry proc_root_self = { static struct proc_dir_entry proc_root_malloc = { PROC_MALLOC, 6, "malloc", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; #endif static struct proc_dir_entry proc_root_kcore = { PROC_KCORE, 5, "kcore", S_IFREG | S_IRUSR, 1, 0, 0, + 0, &proc_kcore_inode_operations }; #ifdef CONFIG_MODULES static struct proc_dir_entry proc_root_modules = { PROC_MODULES, 7, "modules", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_ksyms = { PROC_KSYMS, 5, "ksyms", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; #endif static struct proc_dir_entry proc_root_stat = { PROC_STAT, 4, "stat", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_devices = { PROC_DEVICES, 7, "devices", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_interrupts = { PROC_INTERRUPTS, 10,"interrupts", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; #ifdef __SMP_PROF__ static struct proc_dir_entry proc_root_smp = { PROC_SMP_PROF, 3,"smp", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; #endif static struct proc_dir_entry proc_root_filesystems = { PROC_FILESYSTEMS, 11,"filesystems", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_dma = { PROC_DMA, 3, "dma", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_ioports = { PROC_IOPORTS, 7, "ioports", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_cmdline = { PROC_CMDLINE, 7, "cmdline", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; #ifdef CONFIG_RTC static struct proc_dir_entry proc_root_rtc = { PROC_RTC, 3, "rtc", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; #endif static struct proc_dir_entry proc_root_locks = { PROC_LOCKS, 5, "locks", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_mounts = { PROC_MTAB, 6, "mounts", S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations +}; +static struct proc_dir_entry proc_root_swaps = { + PROC_SWAP, 5, "swaps", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; static struct proc_dir_entry proc_root_profile = { PROC_PROFILE, 7, "profile", S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, + 0, &proc_profile_inode_operations +}; +static struct proc_dir_entry proc_root_slab = { + PROC_SLABINFO, 8, "slabinfo", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations }; void proc_root_init(void) { - static int done = 0; - - if (done) - return; - done = 1; proc_base_init(); proc_register(&proc_root, &proc_root_loadavg); proc_register(&proc_root, &proc_root_uptime); @@ -374,16 +551,25 @@ void proc_root_init(void) #ifdef CONFIG_PCI proc_register(&proc_root, &proc_root_pci); #endif +#ifdef CONFIG_ZORRO + proc_register(&proc_root, &proc_root_zorro); +#endif proc_register(&proc_root, &proc_root_cpuinfo); proc_register(&proc_root, &proc_root_self); - proc_register(&proc_root, &proc_net); - proc_register(&proc_root, &proc_scsi); + proc_net = create_proc_entry("net", S_IFDIR, 0); + proc_scsi = create_proc_entry("scsi", S_IFDIR, 0); +#ifdef CONFIG_SYSCTL proc_register(&proc_root, &proc_sys_root); +#endif +#ifdef CONFIG_MCA + proc_register(&proc_root, &proc_mca); +#endif #ifdef CONFIG_DEBUG_MALLOC proc_register(&proc_root, &proc_root_malloc); #endif proc_register(&proc_root, &proc_root_kcore); + proc_root_kcore.size = (MAP_NR(high_memory) << PAGE_SHIFT) + PAGE_SIZE; #ifdef CONFIG_MODULES proc_register(&proc_root, &proc_root_modules); @@ -405,10 +591,23 @@ void proc_root_init(void) proc_register(&proc_root, &proc_root_locks); proc_register(&proc_root, &proc_root_mounts); + proc_register(&proc_root, &proc_root_swaps); + +#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) +#ifdef CONFIG_SUN_OPENPROMFS + openpromfs_init (); +#endif + proc_register(&proc_root, &proc_openprom); +#endif + proc_register(&proc_root, &proc_root_slab); + if (prof_shift) { proc_register(&proc_root, &proc_root_profile); + proc_root_profile.size = (1+prof_len) * sizeof(unsigned long); } + + proc_tty_init(); } @@ -485,8 +684,17 @@ static int proc_root_lookup(struct inode * dir,const char * name, int len, { unsigned int pid, c; int i, ino, retval; + struct task_struct *p; dir->i_count++; + + if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */ + dir->i_nlink = proc_root.nlink; + for_each_task(p) + if (p && p->pid) + dir->i_nlink++; + } + retval = proc_lookup(dir, name, len, result); if (retval != -ENOENT) { iput(dir); diff --git a/fs/proc/scsi.c b/fs/proc/scsi.c index 447314eab..b1e77398c 100644 --- a/fs/proc/scsi.c +++ b/fs/proc/scsi.c @@ -46,7 +46,7 @@ static struct file_operations proc_scsi_operations = { proc_readscsi, /* read */ proc_writescsi, /* write */ proc_readdir, /* readdir */ - NULL, /* select */ + NULL, /* poll */ NULL, /* ioctl */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/read_write.c b/fs/read_write.c index 6c28b8f59..dd4092301 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -13,6 +13,9 @@ #include <linux/file.h> #include <linux/mm.h> #include <linux/uio.h> +#include <linux/malloc.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -59,6 +62,7 @@ asmlinkage long sys_lseek(unsigned int fd, off_t offset, unsigned int origin) struct file * file; struct inode * inode; + lock_kernel(); retval = -EBADF; if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || @@ -69,6 +73,7 @@ asmlinkage long sys_lseek(unsigned int fd, off_t offset, unsigned int origin) goto bad; retval = llseek(inode, file, offset, origin); bad: + unlock_kernel(); return retval; } @@ -81,6 +86,7 @@ asmlinkage int sys_llseek(unsigned int fd, unsigned long offset_high, struct inode * inode; long long offset; + lock_kernel(); retval = -EBADF; if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || @@ -100,8 +106,8 @@ asmlinkage int sys_llseek(unsigned int fd, unsigned long offset_high, if (retval) retval = -EFAULT; } - bad: + unlock_kernel(); return retval; } @@ -112,6 +118,7 @@ asmlinkage long sys_read(unsigned int fd, char * buf, unsigned long count) struct inode * inode; long (*read)(struct inode *, struct file *, char *, unsigned long); + lock_kernel(); error = -EBADF; file = fget(fd); if (!file) @@ -132,6 +139,7 @@ asmlinkage long sys_read(unsigned int fd, char * buf, unsigned long count) out: fput(file, inode); bad_file: + unlock_kernel(); return error; } @@ -142,6 +150,7 @@ asmlinkage long sys_write(unsigned int fd, const char * buf, unsigned long count struct inode * inode; long (*write)(struct inode *, struct file *, const char *, unsigned long); + lock_kernel(); error = -EBADF; file = fget(fd); if (!file) @@ -163,44 +172,18 @@ asmlinkage long sys_write(unsigned int fd, const char * buf, unsigned long count out: fput(file, inode); bad_file: + unlock_kernel(); return error; } -static long sock_readv_writev(int type, struct inode * inode, struct file * file, - const struct iovec * iov, long count, long size) -{ - struct msghdr msg; - struct socket *sock; - - sock = &inode->u.socket_i; - if (!sock->ops) - return -EOPNOTSUPP; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = NULL; - msg.msg_iov = (struct iovec *) iov; - msg.msg_iovlen = count; - - /* read() does a VERIFY_WRITE */ - if (type == VERIFY_WRITE) { - if (!sock->ops->recvmsg) - return -EOPNOTSUPP; - return sock->ops->recvmsg(sock, &msg, size, - (file->f_flags & O_NONBLOCK), 0, NULL); - } - if (!sock->ops->sendmsg) - return -EOPNOTSUPP; - return sock->ops->sendmsg(sock, &msg, size, - (file->f_flags & O_NONBLOCK), 0); -} - typedef long (*IO_fn_t)(struct inode *, struct file *, char *, unsigned long); static long do_readv_writev(int type, struct inode * inode, struct file * file, const struct iovec * vector, unsigned long count) { unsigned long tot_len; - struct iovec iov[UIO_MAXIOV]; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov=iovstack; long retval, i; IO_fn_t fn; @@ -212,27 +195,46 @@ static long do_readv_writev(int type, struct inode * inode, struct file * file, return 0; if (count > UIO_MAXIOV) return -EINVAL; - if (copy_from_user(iov, vector, count*sizeof(*vector))) + if (count > UIO_FASTIOV) { + iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); + if (!iov) + return -ENOMEM; + } + if (copy_from_user(iov, vector, count*sizeof(*vector))) { + if (iov != iovstack) + kfree(iov); return -EFAULT; + } tot_len = 0; for (i = 0 ; i < count ; i++) tot_len += iov[i].iov_len; retval = locks_verify_area(type == VERIFY_READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, file->f_pos, tot_len); - if (retval) + if (retval) { + if (iov != iovstack) + kfree(iov); return retval; + } /* * Then do the actual IO. Note that sockets need to be handled * specially as they have atomicity guarantees and can handle * iovec's natively */ - if (inode->i_sock) - return sock_readv_writev(type, inode, file, iov, count, tot_len); + if (inode->i_sock) { + int err; + err = sock_readv_writev(type, inode, file, iov, count, tot_len); + if (iov != iovstack) + kfree(iov); + return err; + } - if (!file->f_op) + if (!file->f_op) { + if (iov != iovstack) + kfree(iov); return -EINVAL; + } /* VERIFY_WRITE actually means a read, as we write to user space */ fn = file->f_op->read; if (type == VERIFY_READ) @@ -257,6 +259,8 @@ static long do_readv_writev(int type, struct inode * inode, struct file * file, if (nr != len) break; } + if (iov != iovstack) + kfree(iov); return retval; } @@ -264,26 +268,34 @@ asmlinkage long sys_readv(unsigned long fd, const struct iovec * vector, unsigne { struct file * file; struct inode * inode; + long err = -EBADF; - if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode)) - return -EBADF; + lock_kernel(); + if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode=file->f_inode)) + goto out; if (!(file->f_mode & 1)) - return -EBADF; - return do_readv_writev(VERIFY_WRITE, inode, file, vector, count); + goto out; + err = do_readv_writev(VERIFY_WRITE, inode, file, vector, count); +out: + unlock_kernel(); + return err; } asmlinkage long sys_writev(unsigned long fd, const struct iovec * vector, unsigned long count) { - int error; + int error = -EBADF; struct file * file; struct inode * inode; - if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode)) - return -EBADF; + lock_kernel(); + if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode=file->f_inode)) + goto out; if (!(file->f_mode & 2)) - return -EBADF; + goto out; down(&inode->i_sem); error = do_readv_writev(VERIFY_READ, inode, file, vector, count); up(&inode->i_sem); +out: + unlock_kernel(); return error; } diff --git a/fs/readdir.c b/fs/readdir.c index e81415644..aaea5b45f 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -10,6 +10,8 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -55,23 +57,28 @@ static int fillonedir(void * __buf, const char * name, int namlen, off_t offset, asmlinkage int old_readdir(unsigned int fd, void * dirent, unsigned int count) { - int error; + int error = -EBADF; struct file * file; struct readdir_callback buf; + lock_kernel(); if (fd >= NR_OPEN || !(file = current->files->fd[fd])) - return -EBADF; + goto out; + error = -ENOTDIR; if (!file->f_op || !file->f_op->readdir) - return -ENOTDIR; + goto out; error = verify_area(VERIFY_WRITE, dirent, sizeof(struct old_linux_dirent)); if (error) - return error; + goto out; buf.count = 0; buf.dirent = dirent; error = file->f_op->readdir(file->f_inode, file, &buf, fillonedir); if (error < 0) - return error; - return buf.count; + goto out; + error = buf.count; +out: + unlock_kernel(); + return error; } /* @@ -121,25 +128,32 @@ asmlinkage int sys_getdents(unsigned int fd, void * dirent, unsigned int count) struct file * file; struct linux_dirent * lastdirent; struct getdents_callback buf; - int error; + int error = -EBADF; + lock_kernel(); if (fd >= NR_OPEN || !(file = current->files->fd[fd])) - return -EBADF; + goto out; + error = -ENOTDIR; if (!file->f_op || !file->f_op->readdir) - return -ENOTDIR; + goto out; error = verify_area(VERIFY_WRITE, dirent, count); if (error) - return error; + goto out; buf.current_dir = (struct linux_dirent *) dirent; buf.previous = NULL; buf.count = count; buf.error = 0; error = file->f_op->readdir(file->f_inode, file, &buf, filldir); if (error < 0) - return error; + goto out; lastdirent = buf.previous; - if (!lastdirent) - return buf.error; - put_user(file->f_pos, &lastdirent->d_off); - return count - buf.count; + if (!lastdirent) { + error = buf.error; + } else { + put_user(file->f_pos, &lastdirent->d_off); + error = count - buf.count; + } +out: + unlock_kernel(); + return error; } diff --git a/fs/ext/Makefile b/fs/romfs/Makefile index c467a4870..70fd45a4e 100644 --- a/fs/ext/Makefile +++ b/fs/romfs/Makefile @@ -1,5 +1,5 @@ # -# Makefile for the linux ext-filesystem routines. +# Makefile for the linux romfs-filesystem routines. # # Note! Dependencies are done automagically by 'make dep', which also # removes any old dependencies. DON'T put your own dependencies here @@ -7,9 +7,8 @@ # # Note 2! The CFLAGS definitions are now in the main makefile... -O_TARGET := ext.o -O_OBJS := freelists.o truncate.o namei.o inode.o file.o dir.o \ - symlink.o fsync.o +O_TARGET := romfs.o +O_OBJS := inode.o M_OBJS := $(O_TARGET) include $(TOPDIR)/Rules.make diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c new file mode 100644 index 000000000..f3f73c66e --- /dev/null +++ b/fs/romfs/inode.c @@ -0,0 +1,666 @@ +/* + * ROMFS file system, Linux implementation + * + * Copyright (C) 1997 Janos Farkas <chexum@shadow.banki.hu> + * + * Using parts of the minix filesystem + * Copyright (C) 1991, 1992 Linus Torvalds + * + * and parts of the affs filesystem additionally + * Copyright (C) 1993 Ray Burr + * Copyright (C) 1996 Hans-Joachim Widmaier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Changes + * Changed for 2.1.19 modules + * Jan 1997 Initial release + */ + +/* todo: + * use malloced memory for file names? + * considering write access... + * network (tftp) files? + */ + +/* + * Sorry about some optimizations and for some goto's. I just wanted + * to squeeze some more bytes out of this code.. :) + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/malloc.h> +#include <linux/romfs_fs.h> +#include <linux/fs.h> +#include <linux/locks.h> +#include <linux/init.h> + +#include <asm/uaccess.h> + +static int inline min(int a, int b) +{ + return a<b ? a : b; +} + +static __s32 +romfs_checksum(void *data, int size) +{ + __s32 sum, *ptr; + + sum = 0; ptr = data; + size>>=2; + while (size>0) { + sum += ntohl(*ptr++); + size--; + } + return sum; +} + +static struct super_operations romfs_ops; + +static struct super_block * +romfs_read_super(struct super_block *s, void *data, int silent) +{ + struct buffer_head *bh; + kdev_t dev = s->s_dev; + struct romfs_super_block *rsb; + int sz; + + MOD_INC_USE_COUNT; + + /* I would parse the options, but there are none.. :) */ + + lock_super(s); + set_blocksize(dev, ROMBSIZE); + s->s_blocksize = ROMBSIZE; + s->s_blocksize_bits = ROMBSBITS; + bh = bread(dev, 0, ROMBSIZE); + if (!bh) { + printk ("romfs: unable to read superblock\n"); + goto outnobh; + } + + rsb = (struct romfs_super_block *)bh->b_data; + sz = ntohl(rsb->size); + if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 + || sz < ROMFH_SIZE) { + if (!silent) + printk ("VFS: Can't find a romfs filesystem on dev " + "%s.\n", kdevname(dev)); + goto out; + } + if (romfs_checksum(rsb, min(sz,512))) { + printk ("romfs: bad initial checksum on dev " + "%s.\n", kdevname(dev)); + } + + s->s_magic = ROMFS_MAGIC; + s->u.romfs_sb.s_maxsize = sz; + + s->s_flags |= MS_RDONLY; + + /* Find the start of the fs */ + sz = (ROMFH_SIZE + + strnlen(rsb->name, ROMFS_MAXFN) + 1 + ROMFH_PAD) + & ROMFH_MASK; + + brelse(bh); + + s->s_op = &romfs_ops; + + unlock_super(s); + + if (!(s->s_mounted = iget(s, sz))) + goto outnobh; + + /* Ehrhm; sorry.. :) And thanks to Hans-Joachim Widmaier :) */ + if (0) { +out: + brelse(bh); +outnobh: + s->s_dev = 0; + unlock_super(s); + MOD_DEC_USE_COUNT; + s = NULL; + } + + return s; +} + +/* Nothing to do.. */ + +static void +romfs_put_super(struct super_block *sb) +{ + lock_super(sb); + sb->s_dev = 0; + unlock_super(sb); + MOD_DEC_USE_COUNT; + return; +} + + +/* That's simple too. */ + +static void +romfs_statfs(struct super_block *sb, struct statfs *buf, int bufsize) +{ + struct statfs tmp; + + memset(&tmp, 0, sizeof(tmp)); + tmp.f_type = ROMFS_MAGIC; + tmp.f_bsize = ROMBSIZE; + tmp.f_blocks = (sb->u.romfs_sb.s_maxsize+ROMBSIZE-1)>>ROMBSBITS; + copy_to_user(buf, &tmp, bufsize); +} + +static int +romfs_strnlen(struct inode *i, unsigned long offset, unsigned long count) +{ + struct buffer_head *bh; + unsigned long avail, maxsize, res; + + maxsize = i->i_sb->u.romfs_sb.s_maxsize; + if (offset >= maxsize) + return -1; + + /* strnlen is almost always valid */ + if (count > maxsize || offset+count > maxsize) + count = maxsize-offset; + + bh = bread(i->i_dev, offset>>ROMBSBITS, ROMBSIZE); + if (!bh) + return -1; /* error */ + + avail = ROMBSIZE - (offset & ROMBMASK); + maxsize = min(count, avail); + res = strnlen(((char *)bh->b_data)+(offset&ROMBMASK), maxsize); + brelse(bh); + + if (res < maxsize) + return res; /* found all of it */ + + while (res < count) { + offset += maxsize; + + bh = bread(i->i_dev, offset>>ROMBSBITS, ROMBSIZE); + if (!bh) + return -1; + maxsize = min(count-res, ROMBSIZE); + avail = strnlen(bh->b_data, maxsize); + res += avail; + brelse(bh); + if (avail < maxsize) + return res; + } + return res; +} + +static int +romfs_copyfrom(struct inode *i, void *dest, unsigned long offset, unsigned long count) +{ + struct buffer_head *bh; + unsigned long avail, maxsize, res; + + maxsize = i->i_sb->u.romfs_sb.s_maxsize; + if (offset >= maxsize || count > maxsize || offset+count>maxsize) + return -1; + + bh = bread(i->i_dev, offset>>ROMBSBITS, ROMBSIZE); + if (!bh) + return -1; /* error */ + + avail = ROMBSIZE - (offset & ROMBMASK); + maxsize = min(count, avail); + memcpy(dest, ((char *)bh->b_data) + (offset & ROMBMASK), maxsize); + brelse(bh); + + res = maxsize; /* all of it */ + + while (res < count) { + offset += maxsize; + dest += maxsize; + + bh = bread(i->i_dev, offset>>ROMBSBITS, ROMBSIZE); + if (!bh) + return -1; + maxsize = min(count-res, ROMBSIZE); + memcpy(dest, bh->b_data, maxsize); + brelse(bh); + res += maxsize; + } + return res; +} + +/* Directory operations */ + +static int +romfs_readdir(struct inode *i, struct file *filp, void *dirent, filldir_t filldir) +{ + struct romfs_inode ri; + unsigned long offset, maxoff; + int j, ino, nextfh; + int stored = 0; + char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ + + if (!i || !S_ISDIR(i->i_mode)) + return -EBADF; + + maxoff = i->i_sb->u.romfs_sb.s_maxsize; + + offset = filp->f_pos; + if (!offset) { + offset = i->i_ino & ROMFH_MASK; + if (romfs_copyfrom(i, &ri, offset, ROMFH_SIZE) <= 0) + return stored; + offset = ntohl(ri.spec) & ROMFH_MASK; + } + + /* Not really failsafe, but we are read-only... */ + for(;;) { + if (!offset || offset >= maxoff) { + offset = 0xffffffff; + filp->f_pos = offset; + return stored; + } + filp->f_pos = offset; + + /* Fetch inode info */ + if (romfs_copyfrom(i, &ri, offset, ROMFH_SIZE) <= 0) + return stored; + + j = romfs_strnlen(i, offset+ROMFH_SIZE, sizeof(fsname)-1); + if (j < 0) + return stored; + + fsname[j]=0; + romfs_copyfrom(i, fsname, offset+ROMFH_SIZE, j); + + ino = offset; + nextfh = ntohl(ri.next); + if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) + ino = ntohl(ri.spec); + if (filldir(dirent, fsname, j, offset, ino) < 0) { + return stored; + } + stored++; + offset = nextfh & ROMFH_MASK; + } +} + +static int +romfs_lookup(struct inode *dir, const char *name, int len, struct inode **result) +{ + unsigned long offset, maxoff; + int fslen, res; + char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ + struct romfs_inode ri; + + *result = NULL; + if (!dir || !S_ISDIR(dir->i_mode)) { + res = -EBADF; + goto out; + } + + offset = dir->i_ino & ROMFH_MASK; + if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0) { + res = -ENOENT; + goto out; + } + + maxoff = dir->i_sb->u.romfs_sb.s_maxsize; + offset = ntohl(ri.spec) & ROMFH_MASK; + + for(;;) { + if (!offset || offset >= maxoff + || romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0) { + res = -ENOENT; + goto out; + } + + /* try to match the first 16 bytes of name */ + fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, ROMFH_SIZE); + if (len < ROMFH_SIZE) { + if (len == fslen) { + /* both are shorter, and same size */ + romfs_copyfrom(dir, fsname, offset+ROMFH_SIZE, len+1); + if (strncmp (name, fsname, len) == 0) + break; + } + } else if (fslen >= ROMFH_SIZE) { + /* both are longer; XXX optimize max size */ + fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, sizeof(fsname)-1); + if (len == fslen) { + romfs_copyfrom(dir, fsname, offset+ROMFH_SIZE, len+1); + if (strncmp(name, fsname, len) == 0) + break; + } + } + /* next entry */ + offset = ntohl(ri.next) & ROMFH_MASK; + } + + /* Hard link handling */ + if ((ntohl(ri.next) & ROMFH_TYPE) == ROMFH_HRD) + offset = ntohl(ri.spec) & ROMFH_MASK; + + res = 0; + if (!(*result = iget(dir->i_sb, offset))) + res = -EACCES; + +out: + iput(dir); + return res; +} + +/* + * Ok, we do readpage, to be able to execute programs. Unfortunately, + * bmap is not applicable, since we have looser alignments. + * + * XXX I'm not quite sure that I need to muck around the PG_xx bits.. + */ + +static int +romfs_readpage(struct inode * inode, struct page * page) +{ + unsigned long buf; + unsigned long offset, avail, readlen; + int result = -EIO; + + buf = page_address(page); + atomic_inc(&page->count); + offset = page->offset; + if (offset < inode->i_size) { + avail = inode->i_size-offset; + readlen = min(avail, PAGE_SIZE); + if (romfs_copyfrom(inode, (void *)buf, inode->u.romfs_i.i_dataoffset+offset, readlen) == readlen) { + if (readlen < PAGE_SIZE) { + memset((void *)(buf+readlen),0,PAGE_SIZE-readlen); + } + result = 0; + set_bit(PG_uptodate, &page->flags); + } else { + memset((void *)buf, 0, PAGE_SIZE); + } + } + free_page(buf); + return result; +} + +static int +romfs_readlink(struct inode *inode, char *buffer, int len) +{ + int mylen; + char buf[ROMFS_MAXFN]; /* XXX dynamic */ + + if (!inode || !S_ISLNK(inode->i_mode)) { + mylen = -EBADF; + goto out; + } + + mylen = min(sizeof(buf), inode->i_size); + + if (romfs_copyfrom(inode, buf, inode->u.romfs_i.i_dataoffset, mylen) <= 0) { + mylen = -EIO; + goto out; + } + copy_to_user(buffer, buf, mylen); + +out: + iput(inode); + return mylen; +} + +static int +romfs_follow_link(struct inode *dir, struct inode *inode, + int flag, int mode, struct inode **res_inode) +{ + int error, len; + char *buf; + + *res_inode = NULL; + if (!dir) { + dir = current->fs->root; + dir->i_count++; + } + + if (!inode) { + iput(dir); + return -ENOENT; + } + if (!S_ISLNK(inode->i_mode)) { + *res_inode = inode; + iput(dir); + return 0; + } + if (current->link_count > 5) { + iput(inode); + iput(dir); + return -ELOOP; + } + + /* Eek. Short enough. */ + len = inode->i_size; + if (!(buf = kmalloc(len+1, GFP_KERNEL))) { + iput(inode); + iput(dir); + /* correct? spin? */ + return -EAGAIN; + } + error = romfs_copyfrom(inode, buf, inode->u.romfs_i.i_dataoffset, len); + if (error != len) { + iput(inode); + iput(dir); + error = -EIO; + } else { + iput(inode); + buf[len] = 0; + current->link_count++; + error = open_namei(buf, flag, mode, res_inode, dir); + current->link_count--; + } + + kfree(buf); + return error; +} + +/* Mapping from our types to the kernel */ + +static struct file_operations romfs_file_operations = { + NULL, /* lseek - default */ + generic_file_read, /* read */ + NULL, /* write - bad */ + NULL, /* readdir */ + NULL, /* poll - default */ + NULL, /* ioctl */ + generic_file_mmap, /* mmap */ + NULL, /* open */ + NULL, /* release */ + NULL, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL /* revalidate */ +}; + +static struct inode_operations romfs_file_inode_operations = { + &romfs_file_operations, + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + romfs_readpage, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap -- not really */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ +}; + +static struct file_operations romfs_dir_operations = { + NULL, /* lseek - default */ + NULL, /* read */ + NULL, /* write - bad */ + romfs_readdir, /* readdir */ + NULL, /* poll - default */ + NULL, /* ioctl */ + NULL, /* mmap */ + NULL, /* open */ + NULL, /* release */ + NULL, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL /* revalidate */ +}; + +/* Merged dir/symlink op table. readdir/lookup/readlink/follow_link + * will protect from type mismatch. + */ + +static struct inode_operations romfs_dirlink_inode_operations = { + &romfs_dir_operations, + NULL, /* create */ + romfs_lookup, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + romfs_readlink, /* readlink */ + romfs_follow_link, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ +}; + +static mode_t romfs_modemap[] = +{ + 0, S_IFDIR, S_IFREG, S_IFLNK+0777, + S_IFBLK, S_IFCHR, S_IFSOCK, S_IFIFO +}; + +static struct inode_operations *romfs_inoops[] = +{ + NULL, /* hardlink, handled elsewhere */ + &romfs_dirlink_inode_operations, + &romfs_file_inode_operations, + &romfs_dirlink_inode_operations, + &blkdev_inode_operations, /* standard handlers */ + &chrdev_inode_operations, + NULL, /* socket */ + NULL, /* fifo */ +}; + +static void +romfs_read_inode(struct inode *i) +{ + int nextfh, ino; + struct romfs_inode ri; + + i->i_op = NULL; + + ino = i->i_ino & ROMFH_MASK; + + /* Loop for finding the real hard link */ + for(;;) { + if (romfs_copyfrom(i, &ri, ino, ROMFH_SIZE) <= 0) { + printk("romfs: read error for inode 0x%x\n", ino); + return; + } + nextfh = ntohl(ri.next); + if ((nextfh & ROMFH_TYPE) != ROMFH_HRD) + break; + + ino = ntohl(ri.spec) & ROMFH_MASK; + } + + i->i_nlink = 1; /* Hard to decide.. */ + i->i_size = ntohl(ri.size); + i->i_mtime = i->i_atime = i->i_ctime = 0; + i->i_uid = i->i_gid = 0; + + i->i_op = romfs_inoops[nextfh & ROMFH_TYPE]; + + /* Precalculate the data offset */ + ino = romfs_strnlen(i, ino+ROMFH_SIZE, ROMFS_MAXFN); + if (ino >= 0) + ino = ((ROMFH_SIZE+ino+1+ROMFH_PAD)&ROMFH_MASK); + else + ino = 0; + i->u.romfs_i.i_metasize = ino; + i->u.romfs_i.i_dataoffset = ino+(i->i_ino&ROMFH_MASK); + + /* Compute permissions */ + ino = S_IRUGO|S_IWUSR; + ino |= romfs_modemap[nextfh & ROMFH_TYPE]; + if (nextfh & ROMFH_EXEC) { + ino |= S_IXUGO; + } + i->i_mode = ino; + + if (S_ISFIFO(ino)) + init_fifo(i); + else if (S_ISDIR(ino)) + i->i_size = i->u.romfs_i.i_metasize; + else if (S_ISBLK(ino) || S_ISCHR(ino)) { + i->i_mode &= ~(S_IRWXG|S_IRWXO); + ino = ntohl(ri.spec); + i->i_rdev = MKDEV(ino>>16,ino&0xffff); + } +} + +static struct super_operations romfs_ops = { + romfs_read_inode, /* read inode */ + NULL, /* notify change */ + NULL, /* write inode */ + NULL, /* put inode */ + romfs_put_super, /* put super */ + NULL, /* write super */ + romfs_statfs, /* statfs */ + NULL /* remount */ +}; + +static struct file_system_type romfs_fs_type = { + romfs_read_super, "romfs", 1, NULL +}; + +__initfunc(int init_romfs_fs(void)) +{ + return register_filesystem(&romfs_fs_type); +} + +#ifdef MODULE + +/* Yes, works even as a module... :) */ + +EXPORT_NO_SYMBOLS; + +int +init_module(void) +{ + return init_romfs_fs(); +} + +void +cleanup_module(void) +{ + unregister_filesystem(&romfs_fs_type); +} +#endif diff --git a/fs/select.c b/fs/select.c index 5ffb84f86..683865a30 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1,5 +1,5 @@ /* - * This file contains the procedures for the handling of select + * This file contains the procedures for the handling of select and poll * * Created for Linux based loosely upon Mathius Lattner's minix * patches by Peter MacDonald. Heavily edited by Linus. @@ -21,11 +21,16 @@ #include <linux/errno.h> #include <linux/personality.h> #include <linux/mm.h> +#include <linux/malloc.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> #include <asm/system.h> +#include <asm/poll.h> #define ROUND_UP(x,y) (((x)+(y)-1)/(y)) +#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) /* * Ok, Peter made a complicated, but straightforward multiple_wait() function. @@ -34,20 +39,20 @@ * understand what I'm doing here, then you understand how the linux * sleep/wakeup mechanism works. * - * Two very simple procedures, select_wait() and free_wait() make all the work. - * select_wait() is an inline-function defined in <linux/sched.h>, as all select - * functions have to call it to add an entry to the select table. + * Two very simple procedures, poll_wait() and free_wait() make all the work. + * poll_wait() is an inline-function defined in <linux/sched.h>, as all select/poll + * functions have to call it to add an entry to the poll table. */ /* - * I rewrote this again to make the select_table size variable, take some + * I rewrote this again to make the poll_table size variable, take some * more shortcuts, improve responsiveness, and remove another race that * Linus noticed. -- jrs */ -static void free_wait(select_table * p) +static void free_wait(poll_table * p) { - struct select_table_entry * entry = p->entry + p->nr; + struct poll_table_entry * entry = p->entry + p->nr; while (p->nr > 0) { p->nr--; @@ -57,53 +62,31 @@ static void free_wait(select_table * p) } /* - * The check function checks the ready status of a file using the vfs layer. + * For the kernel fd_set we use a fixed set-size for allocation purposes. + * This set-size doesn't necessarily bear any relation to the size the user + * uses, but should preferably obviously be larger than any possible user + * size (NR_OPEN bits). * - * If the file was not ready we were added to its wait queue. But in - * case it became ready just after the check and just before it called - * select_wait, we call it again, knowing we are already on its - * wait queue this time. The second call is not necessary if the - * select_table is NULL indicating an earlier file check was ready - * and we aren't going to sleep on the select_table. -- jrs + * We need 6 bitmaps (in/out/ex for both incoming and outgoing), and we + * allocate one page for all the bitmaps. Thus we have 8*PAGE_SIZE bits, + * to be divided by 6. And we'd better make sure we round to a full + * long-word (in fact, we'll round to 64 bytes). */ - -static inline int __check( - int (*select) (struct inode *, struct file *, int, select_table *), - struct inode *inode, - struct file *file, - int flag, - select_table * wait) -{ - return select(inode, file, flag, wait) || - (wait && select(inode, file, flag, NULL)); -} - -#define check(flag,wait,file) \ -(((file)->f_op && (file)->f_op->select) ? \ - __check((file)->f_op->select,(file)->f_inode,file,flag,wait) \ - : \ - (flag != SEL_EX)) - -/* - * Due to kernel stack usage, we use a _limited_ fd_set type here, and once - * we really start supporting >256 file descriptors we'll probably have to - * allocate the kernel fd_set copies dynamically.. (The kernel select routines - * are careful to touch only the defined low bits of any fd_set pointer, this - * is important for performance too). - */ -typedef unsigned long limited_fd_set[NR_OPEN/(8*(sizeof(unsigned long)))]; +#define KFDS_64BLOCK ((PAGE_SIZE/(6*64))*64) +#define KFDS_NR (KFDS_64BLOCK*8 > NR_OPEN ? NR_OPEN : KFDS_64BLOCK*8) +typedef unsigned long kernel_fd_set[KFDS_NR/(8*sizeof(unsigned long))]; typedef struct { - limited_fd_set in, out, ex; - limited_fd_set res_in, res_out, res_ex; + kernel_fd_set in, out, ex; + kernel_fd_set res_in, res_out, res_ex; } fd_set_buffer; #define __IN(in) (in) -#define __OUT(in) (in + sizeof(limited_fd_set)/sizeof(unsigned long)) -#define __EX(in) (in + 2*sizeof(limited_fd_set)/sizeof(unsigned long)) -#define __RES_IN(in) (in + 3*sizeof(limited_fd_set)/sizeof(unsigned long)) -#define __RES_OUT(in) (in + 4*sizeof(limited_fd_set)/sizeof(unsigned long)) -#define __RES_EX(in) (in + 5*sizeof(limited_fd_set)/sizeof(unsigned long)) +#define __OUT(in) (in + sizeof(kernel_fd_set)/sizeof(unsigned long)) +#define __EX(in) (in + 2*sizeof(kernel_fd_set)/sizeof(unsigned long)) +#define __RES_IN(in) (in + 3*sizeof(kernel_fd_set)/sizeof(unsigned long)) +#define __RES_OUT(in) (in + 4*sizeof(kernel_fd_set)/sizeof(unsigned long)) +#define __RES_EX(in) (in + 5*sizeof(kernel_fd_set)/sizeof(unsigned long)) #define BITS(in) (*__IN(in)|*__OUT(in)|*__EX(in)) @@ -154,11 +137,15 @@ get_max: #define ISSET(i,m) (((i)&*(m)) != 0) #define SET(i,m) (*(m) |= (i)) +#define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR) +#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) +#define POLLEX_SET (POLLPRI) + static int do_select(int n, fd_set_buffer *fds) { int retval; - select_table wait_table, *wait; - struct select_table_entry *entry; + poll_table wait_table, *wait; + struct poll_table_entry *entry; int i; retval = max_select_fd(n, fds); @@ -166,7 +153,8 @@ static int do_select(int n, fd_set_buffer *fds) goto out; n = retval; retval = -ENOMEM; - if(!(entry = (struct select_table_entry*) __get_free_page(GFP_KERNEL))) + entry = (struct poll_table_entry *) __get_free_page(GFP_KERNEL); + if (!entry) goto out; retval = 0; wait_table.nr = 0; @@ -178,20 +166,30 @@ static int do_select(int n, fd_set_buffer *fds) for (i = 0 ; i < n ; i++,fd++) { unsigned long bit = BIT(i); unsigned long *in = MEM(i,fds->in); - if (ISSET(bit,__IN(in)) && check(SEL_IN,wait,*fd)) { - SET(bit, __RES_IN(in)); - retval++; - wait = NULL; - } - if (ISSET(bit,__OUT(in)) && check(SEL_OUT,wait,*fd)) { - SET(bit, __RES_OUT(in)); - retval++; - wait = NULL; - } - if (ISSET(bit,__EX(in)) && check(SEL_EX,wait,*fd)) { - SET(bit, __RES_EX(in)); - retval++; - wait = NULL; + + if (bit & BITS(in)) { + struct file * file = *fd; + unsigned int mask = POLLNVAL; + if (file) { + mask = DEFAULT_POLLMASK; + if (file->f_op && file->f_op->poll) + mask = file->f_op->poll(file, wait); + } + if ((mask & POLLIN_SET) && ISSET(bit, __IN(in))) { + SET(bit, __RES_IN(in)); + retval++; + wait = NULL; + } + if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(in))) { + SET(bit, __RES_OUT(in)); + retval++; + wait = NULL; + } + if ((mask & POLLEX_SET) && ISSET(bit, __EX(in))) { + SET(bit, __RES_EX(in)); + retval++; + wait = NULL; + } } } wait = NULL; @@ -284,18 +282,21 @@ __zero_fd_set((nr)-1, (unsigned long *) (fdp)) */ asmlinkage int sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) { - int error; - fd_set_buffer fds; + int error = -EINVAL; + fd_set_buffer *fds; unsigned long timeout; - error = -EINVAL; + lock_kernel(); + fds = (fd_set_buffer *) __get_free_page(GFP_KERNEL); + if (!fds) + goto out; if (n < 0) goto out; - if (n > NR_OPEN) - n = NR_OPEN; - if ((error = get_fd_set(n, inp, &fds.in)) || - (error = get_fd_set(n, outp, &fds.out)) || - (error = get_fd_set(n, exp, &fds.ex))) goto out; + if (n > KFDS_NR) + n = KFDS_NR; + if ((error = get_fd_set(n, inp, &fds->in)) || + (error = get_fd_set(n, outp, &fds->out)) || + (error = get_fd_set(n, exp, &fds->ex))) goto out; timeout = ~0UL; if (tvp) { error = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp)); @@ -311,11 +312,11 @@ asmlinkage int sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct if (timeout) timeout += jiffies + 1; } - zero_fd_set(n, &fds.res_in); - zero_fd_set(n, &fds.res_out); - zero_fd_set(n, &fds.res_ex); + zero_fd_set(n, &fds->res_in); + zero_fd_set(n, &fds->res_out); + zero_fd_set(n, &fds->res_ex); current->timeout = timeout; - error = do_select(n, &fds); + error = do_select(n, fds); timeout = current->timeout - jiffies - 1; current->timeout = 0; if ((long) timeout < 0) @@ -334,9 +335,107 @@ asmlinkage int sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct goto out; error = 0; } - set_fd_set(n, inp, &fds.res_in); - set_fd_set(n, outp, &fds.res_out); - set_fd_set(n, exp, &fds.res_ex); + set_fd_set(n, inp, &fds->res_in); + set_fd_set(n, outp, &fds->res_out); + set_fd_set(n, exp, &fds->res_ex); out: + free_page((unsigned long) fds); + unlock_kernel(); return error; } + +static int do_poll(unsigned int nfds, struct pollfd *fds, poll_table *wait) +{ + int count; + struct file ** fd = current->files->fd; + + count = 0; + for (;;) { + unsigned int j; + struct pollfd * fdpnt; + + current->state = TASK_INTERRUPTIBLE; + for (fdpnt = fds, j = 0; j < nfds; j++, fdpnt++) { + unsigned int i; + unsigned int mask; + struct file * file; + + mask = POLLNVAL; + i = fdpnt->fd; + if (i < NR_OPEN && (file = fd[i]) != NULL) { + mask = DEFAULT_POLLMASK; + if (file->f_op && file->f_op->poll) + mask = file->f_op->poll(file, wait); + mask &= fdpnt->events | POLLERR | POLLHUP; + } + if (mask) { + wait = NULL; + count++; + } + fdpnt->revents = mask; + } + + wait = NULL; + if (count || !current->timeout || (current->signal & ~current->blocked)) + break; + schedule(); + } + current->state = TASK_RUNNING; + return count; +} + +asmlinkage int sys_poll(struct pollfd * ufds, unsigned int nfds, int timeout) +{ + int i, count, fdcount, err; + struct pollfd * fds, *fds1; + poll_table wait_table; + struct poll_table_entry *entry; + + lock_kernel(); + err = -ENOMEM; + entry = (struct poll_table_entry *) __get_free_page(GFP_KERNEL); + if (!entry) + goto out; + fds = (struct pollfd *) kmalloc(nfds*sizeof(struct pollfd), GFP_KERNEL); + if (!fds) { + free_page((unsigned long) entry); + goto out; + } + + err = -EFAULT; + if (copy_from_user(fds, ufds, nfds*sizeof(struct pollfd))) { + free_page((unsigned long)entry); + kfree(fds); + goto out; + } + + if (timeout < 0) + timeout = 0x7fffffff; + else if (timeout) + timeout = ((unsigned long)timeout*HZ+999)/1000+jiffies+1; + current->timeout = timeout; + + count = 0; + wait_table.nr = 0; + wait_table.entry = entry; + + fdcount = do_poll(nfds, fds, &wait_table); + current->timeout = 0; + + free_wait(&wait_table); + free_page((unsigned long) entry); + + /* OK, now copy the revents fields back to user space. */ + fds1 = fds; + for(i=0; i < (int)nfds; i++, ufds++, fds++) { + __put_user(fds->revents, &ufds->revents); + } + kfree(fds1); + if (!fdcount && (current->signal & ~current->blocked)) + err = -EINTR; + else + err = fdcount; +out: + unlock_kernel(); + return err; +} diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index a07d13517..e23dbb979 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -56,7 +56,7 @@ static struct file_operations smb_dir_operations = smb_dir_read, /* read - bad */ NULL, /* write - bad */ smb_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ smb_ioctl, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 525c78ab9..93c57e38f 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -215,7 +215,7 @@ static struct file_operations smb_file_operations = smb_file_read, /* read */ smb_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ smb_ioctl, /* ioctl */ smb_mmap, /* mmap */ NULL, /* open */ diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index b39403358..7ab7d15ff 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -5,6 +5,7 @@ * */ +#include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> @@ -18,6 +19,7 @@ #include <linux/locks.h> #include <linux/fcntl.h> #include <linux/malloc.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -431,18 +433,17 @@ static struct file_system_type smb_fs_type = smb_read_super, "smbfs", 0, NULL }; -int -init_smb_fs(void) +__initfunc(int init_smb_fs(void)) { return register_filesystem(&smb_fs_type); } #ifdef MODULE +EXPORT_NO_SYMBOLS; + int init_module(void) { - int status; - DPRINTK("smbfs: init_module called\n"); #ifdef DEBUG_SMB_MALLOC @@ -453,9 +454,7 @@ init_module(void) smb_init_dir_cache(); - if ((status = init_smb_fs()) == 0) - register_symtab(0); - return status; + return init_smb_fs(); } void diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index ee998a89f..1fc0cd171 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -128,7 +128,10 @@ smb_encode_path(struct smb_server *server, const char *name, const int len) { byte *start = p; - p = smb_encode_parents(p, dir); + if (dir != NULL) + { + p = smb_encode_parents(p, dir); + } p = smb_encode_this_name(p, name, len); *p++ = 0; if (server->protocol <= PROTOCOL_COREPLUS) @@ -573,7 +576,6 @@ smb_proc_open(struct smb_server *server, DPRINTK("smb_proc_open: name=%s\n", name); smb_lock_server(server); - buf = server->packet; if (entry->opened != 0) { @@ -582,6 +584,7 @@ smb_proc_open(struct smb_server *server, return 0; } retry: + buf = server->packet; p = smb_setup_header(server, SMBopen, 2, 0); WSET(buf, smb_vwv0, 0x42); /* read/write */ WSET(buf, smb_vwv1, o_attr); @@ -732,8 +735,8 @@ smb_proc_create(struct inode *dir, const char *name, int len, __u16 fileid; smb_lock_server(server); - buf = server->packet; retry: + buf = server->packet; p = smb_setup_header(server, SMBcreate, 3, 0); WSET(buf, smb_vwv0, attr); DSET(buf, smb_vwv1, utc2local(ctime)); @@ -764,15 +767,13 @@ smb_proc_mv(struct inode *odir, const char *oname, const int olen, { char *p; struct smb_server *server = SMB_SERVER(odir); - char *buf; int result; smb_lock_server(server); - buf = server->packet; retry: p = smb_setup_header(server, SMBmv, 1, 0); - WSET(buf, smb_vwv0, aSYSTEM | aHIDDEN); + WSET(server->packet, smb_vwv0, aSYSTEM | aHIDDEN); *p++ = 4; p = smb_encode_path(server, p, SMB_INOP(odir), oname, olen); *p++ = 4; @@ -825,7 +826,6 @@ smb_proc_rmdir(struct inode *dir, const char *name, const int len) smb_lock_server(server); - retry: p = smb_setup_header(server, SMBrmdir, 0, 0); *p++ = 4; @@ -848,15 +848,13 @@ smb_proc_unlink(struct inode *dir, const char *name, const int len) { char *p; struct smb_server *server = SMB_SERVER(dir); - char *buf; int result; smb_lock_server(server); - buf = server->packet; retry: p = smb_setup_header(server, SMBunlink, 1, 0); - WSET(buf, smb_vwv0, aSYSTEM | aHIDDEN); + WSET(server->packet, smb_vwv0, aSYSTEM | aHIDDEN); *p++ = 4; p = smb_encode_path(server, p, SMB_INOP(dir), name, len); smb_setup_bcc(server, p); @@ -880,9 +878,9 @@ smb_proc_trunc(struct smb_server *server, word fid, dword length) int result; smb_lock_server(server); - buf = server->packet; retry: + buf = server->packet; p = smb_setup_header(server, SMBwrite, 5, 0); WSET(buf, smb_vwv0, fid); WSET(buf, smb_vwv1, 0); @@ -1011,9 +1009,9 @@ smb_proc_readdir_short(struct smb_server *server, struct inode *dir, int fpos, DPRINTK("SMB call readdir %d @ %d\n", cache_size, fpos); smb_lock_server(server); - buf = server->packet; retry: + buf = server->packet; first = 1; total_count = 0; current_entry = entry; @@ -1389,11 +1387,11 @@ smb_proc_getattr_core(struct inode *dir, const char *name, int len, char *buf; smb_lock_server(server); - buf = server->packet; DDPRINTK("smb_proc_getattr: %s\n", name); retry: + buf = server->packet; p = smb_setup_header(server, SMBgetatr, 0, 0); *p++ = 4; p = smb_encode_path(server, p, SMB_INOP(dir), name, len); @@ -1512,9 +1510,9 @@ smb_proc_setattr_core(struct smb_server *server, int result; smb_lock_server(server); - buf = server->packet; retry: + buf = server->packet; p = smb_setup_header(server, SMBsetatr, 8, 0); WSET(buf, smb_vwv0, new_finfo->attr); DSET(buf, smb_vwv1, utc2local(new_finfo->f_mtime)); diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c index ca6d8c269..4d85b8e66 100644 --- a/fs/smbfs/sock.c +++ b/fs/smbfs/sock.c @@ -15,6 +15,7 @@ #include <linux/net.h> #include <linux/mm.h> #include <linux/netdevice.h> +#include <net/scm.h> #include <net/ip.h> #include <linux/smb.h> @@ -26,42 +27,53 @@ static int _recvfrom(struct socket *sock, unsigned char *ubuf, int size, - int noblock, unsigned flags, struct sockaddr_in *sa, int *addr_len) + unsigned flags) { struct iovec iov; struct msghdr msg; + struct scm_cookie scm; - iov.iov_base = ubuf; - iov.iov_len = size; - - msg.msg_name = (void *) sa; + msg.msg_name = NULL; msg.msg_namelen = 0; - if (addr_len) - msg.msg_namelen = *addr_len; - msg.msg_control = NULL; msg.msg_iov = &iov; msg.msg_iovlen = 1; - - return sock->ops->recvmsg(sock, &msg, size, noblock, flags, addr_len); + msg.msg_control = NULL; + iov.iov_base = ubuf; + iov.iov_len = size; + + memset(&scm, 0,sizeof(scm)); + size=sock->ops->recvmsg(sock, &msg, size, flags, &scm); + if(size>=0) + scm_recv(sock,&msg,&scm,flags); + return size; } static int -_send(struct socket *sock, const void *buff, int len, - int nonblock, unsigned flags) +_send(struct socket *sock, const void *buff, int len) { struct iovec iov; struct msghdr msg; - - iov.iov_base = (void *) buff; - iov.iov_len = len; + struct scm_cookie scm; + int err; msg.msg_name = NULL; msg.msg_namelen = 0; - msg.msg_control = NULL; msg.msg_iov = &iov; msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + + iov.iov_base = (void *)buff; + iov.iov_len = len; + + msg.msg_flags = 0; - return sock->ops->sendmsg(sock, &msg, len, nonblock, flags); + err = scm_send(sock, &msg, &scm); + if (err < 0) + return err; + err = sock->ops->sendmsg(sock, &msg, len, &scm); + scm_destroy(&scm); + return err; } static void @@ -78,14 +90,14 @@ smb_data_callback(struct sock *sk, int len) fs = get_fs(); set_fs(get_ds()); - result = _recvfrom(sock, (void *) peek_buf, 1, 1, - MSG_PEEK, NULL, NULL); + result = _recvfrom(sock, (void *) peek_buf, 1, + MSG_PEEK | MSG_DONTWAIT); while ((result != -EAGAIN) && (peek_buf[0] == 0x85)) { /* got SESSION KEEP ALIVE */ - result = _recvfrom(sock, (void *) peek_buf, - 4, 1, 0, NULL, NULL); + result = _recvfrom(sock, (void *) peek_buf, 4, + MSG_DONTWAIT); DDPRINTK("smb_data_callback:" " got SESSION KEEP ALIVE\n"); @@ -94,9 +106,8 @@ smb_data_callback(struct sock *sk, int len) { break; } - result = _recvfrom(sock, (void *) peek_buf, - 1, 1, MSG_PEEK, - NULL, NULL); + result = _recvfrom(sock, (void *) peek_buf, 1, + MSG_PEEK | MSG_DONTWAIT); } set_fs(fs); @@ -132,7 +143,7 @@ smb_catch_keepalive(struct smb_server *server) server->data_ready = NULL; return -EINVAL; } - sk = (struct sock *) (sock->data); + sk = sock->sk; if (sk == NULL) { @@ -178,7 +189,7 @@ smb_dont_catch_keepalive(struct smb_server *server) printk("smb_dont_catch_keepalive: did not get SOCK_STREAM\n"); return -EINVAL; } - sk = (struct sock *) (sock->data); + sk = sock->sk; if (sk == NULL) { @@ -216,8 +227,12 @@ smb_send_raw(struct socket *sock, unsigned char *source, int length) { result = _send(sock, (void *) (source + already_sent), - length - already_sent, 0, 0); + length - already_sent); + if (result == 0) + { + return -EIO; + } if (result < 0) { DPRINTK("smb_send_raw: sendto error = %d\n", @@ -239,9 +254,12 @@ smb_receive_raw(struct socket *sock, unsigned char *target, int length) { result = _recvfrom(sock, (void *) (target + already_read), - length - already_read, 0, 0, - NULL, NULL); + length - already_read, 0); + if (result == 0) + { + return -EIO; + } if (result < 0) { DPRINTK("smb_receive_raw: recvfrom error = %d\n", @@ -369,7 +387,6 @@ smb_receive_trans2(struct smb_server *server, int total_data = 0; int total_param = 0; int result; - unsigned char *inbuf = server->packet; unsigned char *rcv_buf; int buf_len; int data_len = 0; @@ -385,8 +402,8 @@ smb_receive_trans2(struct smb_server *server, *ldata = *lparam = 0; return 0; } - total_data = WVAL(inbuf, smb_tdrcnt); - total_param = WVAL(inbuf, smb_tprcnt); + total_data = WVAL(server->packet, smb_tdrcnt); + total_param = WVAL(server->packet, smb_tprcnt); DDPRINTK("smb_receive_trans2: td=%d,tp=%d\n", total_data, total_param); @@ -411,6 +428,8 @@ smb_receive_trans2(struct smb_server *server, while (1) { + unsigned char *inbuf = server->packet; + if (WVAL(inbuf, smb_prdisp) + WVAL(inbuf, smb_prcnt) > total_param) { @@ -480,6 +499,8 @@ smb_receive_trans2(struct smb_server *server, return result; } +extern struct net_proto_family inet_family_ops; + int smb_release(struct smb_server *server) { @@ -498,8 +519,8 @@ smb_release(struct smb_server *server) is nothing behind it, so I set it to SS_UNCONNECTED. */ sock->state = SS_UNCONNECTED; - result = sock->ops->create(sock, 0); - DPRINTK("smb_release: sock->ops->create = %d\n", result); + result = inet_family_ops.create(sock, 0); + DPRINTK("smb_release: inet_create = %d\n", result); return result; } @@ -588,6 +609,8 @@ smb_send_trans2(struct smb_server *server, __u16 trans2_command, int lparam, unsigned char *param) { struct socket *sock = server_sock(server); + struct scm_cookie scm; + int err; /* I know the following is very ugly, but I want to build the smb packet as efficiently as possible. */ @@ -632,6 +655,15 @@ smb_send_trans2(struct smb_server *server, __u16 trans2_command, *p++ = 'D'; /* this was added because OS/2 does it */ *p++ = ' '; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 4; + msg.msg_flags = 0; + iov[0].iov_base = (void *) server->packet; iov[0].iov_len = oparam; iov[1].iov_base = (param == NULL) ? padding : param; @@ -641,13 +673,13 @@ smb_send_trans2(struct smb_server *server, __u16 trans2_command, iov[3].iov_base = (data == NULL) ? padding : data; iov[3].iov_len = ldata; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = NULL; - msg.msg_iov = iov; - msg.msg_iovlen = 4; - - return sock->ops->sendmsg(sock, &msg, packet_length, 0, 0); + err = scm_send(sock, &msg, &scm); + if (err < 0) + return err; + + err = sock->ops->sendmsg(sock, &msg, packet_length, &scm); + scm_destroy(&scm); + return err; } /* @@ -11,16 +11,30 @@ #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> -#ifndef __alpha__ +/* + * Revalidate the inode. This is required for proper NFS attribute caching. + */ +static __inline__ int +do_revalidate(struct inode *inode) +{ + if (inode->i_op && inode->i_op->revalidate) + return inode->i_op->revalidate(inode); + return 0; +} + + +#if !defined(__alpha__) && !defined(__sparc__) /* * For backward compatibility? Maybe this should be moved * into arch/i386 instead? */ -static void cp_old_stat(struct inode * inode, struct __old_kernel_stat * statbuf) +static int cp_old_stat(struct inode * inode, struct __old_kernel_stat * statbuf) { struct __old_kernel_stat tmp; @@ -39,12 +53,12 @@ static void cp_old_stat(struct inode * inode, struct __old_kernel_stat * statbuf tmp.st_atime = inode->i_atime; tmp.st_mtime = inode->i_mtime; tmp.st_ctime = inode->i_ctime; - copy_to_user(statbuf,&tmp,sizeof(tmp)); + return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } #endif -static void cp_new_stat(struct inode * inode, struct stat * statbuf) +static int cp_new_stat(struct inode * inode, struct stat * statbuf) { struct stat tmp; unsigned int blocks, indirect; @@ -99,10 +113,10 @@ static void cp_new_stat(struct inode * inode, struct stat * statbuf) tmp.st_blocks = inode->i_blocks; tmp.st_blksize = inode->i_blksize; } - copy_to_user(statbuf,&tmp,sizeof(tmp)); + return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } -#ifndef __alpha__ +#if !defined(__alpha__) && !defined(__sparc__) /* * For backward compatibility? Maybe this should be moved * into arch/i386 instead? @@ -112,15 +126,16 @@ asmlinkage int sys_stat(char * filename, struct __old_kernel_stat * statbuf) struct inode * inode; int error; - error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); - if (error) - return error; + lock_kernel(); error = namei(filename,&inode); if (error) - return error; - cp_old_stat(inode,statbuf); + goto out; + if ((error = do_revalidate(inode)) == 0) + error = cp_old_stat(inode,statbuf); iput(inode); - return 0; +out: + unlock_kernel(); + return error; } #endif @@ -129,18 +144,19 @@ asmlinkage int sys_newstat(char * filename, struct stat * statbuf) struct inode * inode; int error; - error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); - if (error) - return error; + lock_kernel(); error = namei(filename,&inode); if (error) - return error; - cp_new_stat(inode,statbuf); + goto out; + if ((error = do_revalidate(inode)) == 0) + error = cp_new_stat(inode,statbuf); iput(inode); - return 0; +out: + unlock_kernel(); + return error; } -#ifndef __alpha__ +#if !defined(__alpha__) && !defined(__sparc__) /* * For backward compatibility? Maybe this should be moved @@ -151,15 +167,16 @@ asmlinkage int sys_lstat(char * filename, struct __old_kernel_stat * statbuf) struct inode * inode; int error; - error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); - if (error) - return error; + lock_kernel(); error = lnamei(filename,&inode); if (error) - return error; - cp_old_stat(inode,statbuf); + goto out; + if ((error = do_revalidate(inode)) == 0) + error = cp_old_stat(inode,statbuf); iput(inode); - return 0; +out: + unlock_kernel(); + return error; } #endif @@ -169,18 +186,19 @@ asmlinkage int sys_newlstat(char * filename, struct stat * statbuf) struct inode * inode; int error; - error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); - if (error) - return error; + lock_kernel(); error = lnamei(filename,&inode); if (error) - return error; - cp_new_stat(inode,statbuf); + goto out; + if ((error = do_revalidate(inode)) == 0) + error = cp_new_stat(inode,statbuf); iput(inode); - return 0; +out: + unlock_kernel(); + return error; } -#ifndef __alpha__ +#if !defined(__alpha__) && !defined(__sparc__) /* * For backward compatibility? Maybe this should be moved @@ -190,15 +208,16 @@ asmlinkage int sys_fstat(unsigned int fd, struct __old_kernel_stat * statbuf) { struct file * f; struct inode * inode; - int error; + int ret = -EBADF; - error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); - if (error) - return error; + lock_kernel(); if (fd >= NR_OPEN || !(f=current->files->fd[fd]) || !(inode=f->f_inode)) - return -EBADF; - cp_old_stat(inode,statbuf); - return 0; + goto out; + if ((ret = do_revalidate(inode)) == 0) + ret = cp_old_stat(inode,statbuf); +out: + unlock_kernel(); + return ret; } #endif @@ -207,33 +226,40 @@ asmlinkage int sys_newfstat(unsigned int fd, struct stat * statbuf) { struct file * f; struct inode * inode; - int error; + int err = -EBADF; - error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); - if (error) - return error; + lock_kernel(); if (fd >= NR_OPEN || !(f=current->files->fd[fd]) || !(inode=f->f_inode)) - return -EBADF; - cp_new_stat(inode,statbuf); - return 0; + goto out; + if ((err = do_revalidate(inode)) == 0) + err = cp_new_stat(inode,statbuf); +out: + unlock_kernel(); + return err; } asmlinkage int sys_readlink(const char * path, char * buf, int bufsiz) { struct inode * inode; - int error; + int error = -EINVAL; + lock_kernel(); if (bufsiz <= 0) - return -EINVAL; + goto out; error = verify_area(VERIFY_WRITE,buf,bufsiz); if (error) - return error; + goto out; error = lnamei(path,&inode); if (error) - return error; - if (!inode->i_op || !inode->i_op->readlink) { + goto out; + error = -EINVAL; + if (!inode->i_op || !inode->i_op->readlink + || (error = do_revalidate(inode)) < 0) { iput(inode); - return -EINVAL; + goto out; } - return inode->i_op->readlink(inode,buf,bufsiz); + error = inode->i_op->readlink(inode,buf,bufsiz); +out: + unlock_kernel(); + return error; } diff --git a/fs/super.c b/fs/super.c index 443c2d176..053c6321a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -30,6 +30,8 @@ #include <linux/string.h> #include <linux/locks.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <linux/fd.h> #include <asm/system.h> @@ -46,7 +48,6 @@ extern void wait_for_keypress(void); extern struct file_operations * get_blkfops(unsigned int major); -extern void blkdev_release (struct inode *); extern int root_mountflags; @@ -98,7 +99,7 @@ struct vfsmount *add_vfsmnt(kdev_t dev, const char *dev_name, const char *dir_na memset(lptr, 0, sizeof(struct vfsmount)); lptr->mnt_dev = dev; - lptr->mnt_sem.count = 1; + sema_init(&lptr->mnt_sem, 1); if (dev_name && !getname(dev_name, &tmp)) { if ((lptr->mnt_devname = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL)) != (char *)NULL) @@ -213,7 +214,7 @@ static int fs_index(const char * __name) static int fs_name(unsigned int index, char * buf) { struct file_system_type * tmp; - int err, len; + int len; tmp = file_systems; while (tmp && index > 0) { @@ -223,11 +224,7 @@ static int fs_name(unsigned int index, char * buf) if (!tmp) return -EINVAL; len = strlen(tmp->name) + 1; - err = verify_area(VERIFY_WRITE, buf, len); - if (err) - return err; - copy_to_user(buf, tmp->name, len); - return 0; + return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0; } static int fs_maxindex(void) @@ -250,6 +247,7 @@ asmlinkage int sys_sysfs(int option, ...) int retval = -EINVAL; unsigned int index; + lock_kernel(); va_start(args, option); switch (option) { case 1: @@ -266,6 +264,7 @@ asmlinkage int sys_sysfs(int option, ...) break; } va_end(args); + unlock_kernel(); return retval; } @@ -278,6 +277,7 @@ static struct proc_fs_info { { MS_NODEV, ",nodev" }, { MS_SYNCHRONOUS, ",sync" }, { MS_MANDLOCK, ",mand" }, + { MS_NOATIME, ",noatime" }, #ifdef MS_NOSUB /* Can't find this except in mount.c */ { MS_NOSUB, ",nosub" }, #endif @@ -325,6 +325,7 @@ int get_filesystem_info( char *buf ) len += sprintf(buf+len, ",wsize=%d", nfss->wsize); } +#if 0 if (nfss->timeo != 7*HZ/10) { len += sprintf(buf+len, ",timeo=%d", nfss->timeo*10/HZ); @@ -333,6 +334,7 @@ int get_filesystem_info( char *buf ) len += sprintf(buf+len, ",retrans=%d", nfss->retrans); } +#endif if (nfss->acregmin != 3*HZ) { len += sprintf(buf+len, ",acregmin=%d", nfss->acregmin/HZ); @@ -476,18 +478,15 @@ asmlinkage int sys_ustat(dev_t dev, struct ustat * ubuf) struct ustat tmp; struct statfs sbuf; unsigned long old_fs; - int error; + int err = -EINVAL; + lock_kernel(); s = get_super(to_kdev_t(dev)); if (s == NULL) - return -EINVAL; - + goto out; + err = -ENOSYS; if (!(s->s_op->statfs)) - return -ENOSYS; - - error = verify_area(VERIFY_WRITE,ubuf,sizeof(struct ustat)); - if (error) - return error; + goto out; old_fs = get_fs(); set_fs(get_ds()); @@ -498,8 +497,10 @@ asmlinkage int sys_ustat(dev_t dev, struct ustat * ubuf) tmp.f_tfree = sbuf.f_bfree; tmp.f_tinode = sbuf.f_ffree; - copy_to_user(ubuf,&tmp,sizeof(struct ustat)); - return 0; + err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0; +out: + unlock_kernel(); + return err; } static struct super_block * read_super(kdev_t dev,const char *name,int flags, @@ -559,10 +560,9 @@ kdev_t get_unnamed_dev(void) void put_unnamed_dev(kdev_t dev) { - if (!dev) + if (!dev || MAJOR(dev) != UNNAMED_MAJOR) return; - if (MAJOR(dev) == UNNAMED_MAJOR && - clear_bit(MINOR(dev), unnamed_dev_in_use)) + if (clear_bit(MINOR(dev), unnamed_dev_in_use)) return; printk("VFS: put_unnamed_dev: freeing unused device %s\n", kdevname(dev)); @@ -635,27 +635,30 @@ asmlinkage int sys_umount(char * name) { struct inode * inode; kdev_t dev; - int retval; + int retval = -EPERM; struct inode dummy_inode; + lock_kernel(); if (!suser()) - return -EPERM; + goto out; retval = namei(name, &inode); if (retval) { retval = lnamei(name, &inode); if (retval) - return retval; + goto out; } if (S_ISBLK(inode->i_mode)) { dev = inode->i_rdev; + retval = -EACCES; if (IS_NODEV(inode)) { iput(inode); - return -EACCES; + goto out; } } else { + retval = -EINVAL; if (!inode->i_sb || inode != inode->i_sb->s_mounted) { iput(inode); - return -EINVAL; + goto out; } dev = inode->i_sb->s_dev; iput(inode); @@ -663,25 +666,26 @@ asmlinkage int sys_umount(char * name) dummy_inode.i_rdev = dev; inode = &dummy_inode; } + retval = -ENXIO; if (MAJOR(dev) >= MAX_BLKDEV) { iput(inode); - return -ENXIO; + goto out; } retval = do_umount(dev,0); if (!retval) { fsync_dev(dev); if (dev != ROOT_DEV) { blkdev_release (inode); - if (MAJOR(dev) == UNNAMED_MAJOR) - put_unnamed_dev(dev); + put_unnamed_dev(dev); } } if (inode != &dummy_inode) iput(inode); - if (retval) - return retval; - fsync_dev(dev); - return 0; + if (!retval) + fsync_dev(dev); +out: + unlock_kernel(); + return retval; } /* @@ -757,6 +761,7 @@ static int do_remount_sb(struct super_block *sb, int flags, char *data) if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) if (!fs_may_remount_ro(sb->s_dev)) return -EBUSY; + sb->s_flags = (flags & ~MS_RDONLY) | (sb->s_flags & MS_RDONLY); if (sb->s_op && sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &flags, data); if (retval) @@ -808,7 +813,10 @@ static int copy_mount_options (const void * data, unsigned long *where) if (!(page = __get_free_page(GFP_KERNEL))) { return -ENOMEM; } - copy_from_user((void *) page,data,i); + if (copy_from_user((void *) page,data,i)) { + free_page(page); + return -EFAULT; + } *where = page; return 0; } @@ -822,9 +830,9 @@ static int copy_mount_options (const void * data, unsigned long *where) * information (or be NULL). * * NOTE! As old versions of mount() didn't use this setup, the flags - * has to have a special 16-bit magic number in the hight word: + * have to have a special 16-bit magic number in the high word: * 0xC0ED. If this magic word isn't present, the flags and data info - * isn't used, as the syscall assumes we are talking to an older + * aren't used, as the syscall assumes we are talking to an older * version that didn't understand them. */ asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type, @@ -834,54 +842,60 @@ asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type, struct inode * inode; struct file_operations * fops; kdev_t dev; - int retval; + int retval = -EPERM; const char * t; unsigned long flags = 0; unsigned long page = 0; + lock_kernel(); if (!suser()) - return -EPERM; + goto out; if ((new_flags & (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) { retval = copy_mount_options (data, &page); if (retval < 0) - return retval; + goto out; retval = do_remount(dir_name, new_flags & ~MS_MGC_MSK & ~MS_REMOUNT, (char *) page); free_page(page); - return retval; + goto out; } retval = copy_mount_options (type, &page); if (retval < 0) - return retval; + goto out; fstype = get_fs_type((char *) page); free_page(page); + retval = -ENODEV; if (!fstype) - return -ENODEV; + goto out; t = fstype->name; fops = NULL; if (fstype->requires_dev) { retval = namei(dev_name, &inode); if (retval) - return retval; + goto out; + retval = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) { iput(inode); - return -ENOTBLK; + goto out; } + retval = -EACCES; if (IS_NODEV(inode)) { iput(inode); - return -EACCES; + goto out; } dev = inode->i_rdev; + retval = -ENXIO; if (MAJOR(dev) >= MAX_BLKDEV) { iput(inode); - return -ENXIO; + goto out; } fops = get_blkfops(MAJOR(dev)); + retval = -ENOTBLK; if (!fops) { iput(inode); - return -ENOTBLK; + goto out; } if (fops->open) { struct file dummy; /* allows read-write or read-only flag */ @@ -891,13 +905,14 @@ asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type, retval = fops->open(inode, &dummy); if (retval) { iput(inode); - return retval; + goto out; } } } else { + retval = -EMFILE; if (!(dev = get_unnamed_dev())) - return -EMFILE; + goto out; inode = NULL; } page = 0; @@ -905,15 +920,20 @@ asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type, flags = new_flags & ~MS_MGC_MSK; retval = copy_mount_options(data, &page); if (retval < 0) { + put_unnamed_dev(dev); iput(inode); - return retval; + goto out; } } retval = do_mount(dev,dev_name,dir_name,t,flags,(void *) page); free_page(page); - if (retval && fops && fops->release) + if (retval && fops && fops->release) { fops->release(inode, NULL); + put_unnamed_dev(dev); + } iput(inode); +out: + unlock_kernel(); return retval; } diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 52515b4f9..3dd0931cf 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -34,7 +34,7 @@ static struct file_operations sysv_dir_operations = { sysv_dir_read, /* read */ NULL, /* write - bad */ sysv_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 6029051d8..f3aadb509 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -44,7 +44,7 @@ static struct file_operations sysv_file_operations = { sysv_file_read, /* read */ sysv_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ generic_file_mmap, /* mmap */ NULL, /* no special open is needed */ diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 4e14cb35e..ebbf0bb4f 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -20,6 +20,7 @@ * the superblock. */ +#include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> @@ -29,6 +30,7 @@ #include <linux/stat.h> #include <linux/string.h> #include <linux/locks.h> +#include <linux/init.h> #include <asm/uaccess.h> @@ -42,7 +44,7 @@ void sysv_put_inode(struct inode *inode) } -static struct super_operations sysv_sops = { +static struct super_operations sysv_sops = { sysv_read_inode, sysv_notify_change, sysv_write_inode, @@ -72,7 +74,7 @@ static void detected_bs512 (struct super_block *sb) sb->sv_inodes_per_block = 512/64; sb->sv_inodes_per_block_1 = 512/64-1; sb->sv_inodes_per_block_bits = 9-6; - sb->sv_toobig_block = 10 + + sb->sv_toobig_block = 10 + (sb->sv_ind_per_block = 512/4) + (sb->sv_ind_per_block_2 = (512/4)*(512/4)) + (sb->sv_ind_per_block_3 = (512/4)*(512/4)*(512/4)); @@ -99,7 +101,7 @@ static void detected_bs1024 (struct super_block *sb) sb->sv_inodes_per_block = 1024/64; sb->sv_inodes_per_block_1 = 1024/64-1; sb->sv_inodes_per_block_bits = 10-6; - sb->sv_toobig_block = 10 + + sb->sv_toobig_block = 10 + (sb->sv_ind_per_block = 1024/4) + (sb->sv_ind_per_block_2 = (1024/4)*(1024/4)) + (sb->sv_ind_per_block_3 = (1024/4)*(1024/4)*(1024/4)); @@ -336,7 +338,7 @@ static struct super_block * detected_coherent (struct super_block *sb, struct bu return sb; } -struct super_block *sysv_read_super(struct super_block *sb,void *data, +struct super_block *sysv_read_super(struct super_block *sb,void *data, int silent) { struct buffer_head *bh; @@ -669,7 +671,7 @@ repeat: return result; } -static struct buffer_head * block_getblk(struct inode * inode, +static struct buffer_head * block_getblk(struct inode * inode, struct buffer_head * bh, int nr, int create) { struct super_block *sb; @@ -978,26 +980,24 @@ static struct file_system_type sysv_fs_type[3] = { {sysv_read_super, "coherent", 1, NULL} }; -int init_sysv_fs(void) +__initfunc(int init_sysv_fs(void)) { int i; int ouch; for (i = 0; i < 3; i++) { if ((ouch = register_filesystem(&sysv_fs_type[i])) != 0) - return ouch; + break; } return ouch; } #ifdef MODULE +EXPORT_NO_SYMBOLS; + int init_module(void) { - int status; - - if ((status = init_sysv_fs()) == 0) - register_symtab(0); - return status; + return init_sysv_fs(); } void cleanup_module(void) diff --git a/fs/ufs/ufs_dir.c b/fs/ufs/ufs_dir.c index 37595a9a0..26ae02abe 100644 --- a/fs/ufs/ufs_dir.c +++ b/fs/ufs/ufs_dir.c @@ -6,7 +6,7 @@ * Laboratory for Computer Science Research Computing Facility * Rutgers, The State University of New Jersey * - * $Id: ufs_dir.c,v 1.7 1996/05/21 19:01:45 davem Exp $ + * $Id: ufs_dir.c,v 1.8 1997/01/26 07:14:28 davem Exp $ * */ @@ -143,7 +143,7 @@ static struct file_operations ufs_dir_operations = { NULL, /* read */ NULL, /* write */ ufs_readdir, /* readdir */ - NULL, /* select */ + NULL, /* poll */ NULL, /* ioctl */ NULL, /* mmap */ NULL, /* open */ diff --git a/fs/ufs/ufs_file.c b/fs/ufs/ufs_file.c index 0fb55d86b..4b479a65e 100644 --- a/fs/ufs/ufs_file.c +++ b/fs/ufs/ufs_file.c @@ -6,7 +6,7 @@ * Laboratory for Computer Science Research Computing Facility * Rutgers, The State University of New Jersey * - * $Id: ufs_file.c,v 1.6 1996/05/19 03:55:48 krioles Exp $ + * $Id: ufs_file.c,v 1.7 1997/01/26 07:14:28 davem Exp $ * */ @@ -18,7 +18,7 @@ static struct file_operations ufs_file_operations = { generic_file_read, /* read */ NULL, /* write */ NULL, /* readdir */ - NULL, /* select */ + NULL, /* poll */ NULL, /* ioctl */ generic_file_mmap, /* mmap */ NULL, /* open */ diff --git a/fs/ufs/ufs_super.c b/fs/ufs/ufs_super.c index 11657c704..44d7241ef 100644 --- a/fs/ufs/ufs_super.c +++ b/fs/ufs/ufs_super.c @@ -6,7 +6,9 @@ * Laboratory for Computer Science Research Computing Facility * Rutgers, The State University of New Jersey * - * $Id: ufs_super.c,v 1.17 1996/09/03 07:15:53 ecd Exp $ + * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) + * + * $Id: ufs_super.c,v 1.23 1997/04/16 04:53:39 tdyas Exp $ * */ @@ -18,12 +20,14 @@ * Gertjan van Wingerde <gertjan@cs.vu.nl> */ +#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/fs.h> #include <linux/ufs_fs.h> #include <linux/locks.h> +#include <linux/init.h> #include <asm/uaccess.h> @@ -48,20 +52,17 @@ static struct file_system_type ufs_fs_type = { ufs_read_super, "ufs", 1, NULL }; -int -init_ufs_fs(void) +__initfunc(int init_ufs_fs(void)) { return(register_filesystem(&ufs_fs_type)); } #ifdef MODULE +EXPORT_NO_SYMBOLS; + int init_module(void) { - int status; - - if ((status = init_ufs_fs()) == 0) - register_symtab(0); - return status; + return init_ufs_fs(); } void cleanup_module(void) @@ -136,7 +137,7 @@ ufs_read_super(struct super_block * sb, void * data, int silent) goto ufs_read_super_lose; } /* XXX - redo this so we can free it later... */ - usb = (struct ufs_superblock *)__get_free_page(GFP_KERNEL); + usb = (struct ufs_superblock *)__get_free_page(GFP_KERNEL); if (usb == NULL) { brelse(bh1); brelse(bh2); @@ -158,7 +159,7 @@ ufs_read_super(struct super_block * sb, void * data, int silent) ufs_need_swab = 1; sb->s_magic = ufs_swab32(usb->fs_magic); if (sb->s_magic != UFS_MAGIC) { - printk ("ufs_read_super: bad magic number 0x%8.8x " + printk ("ufs_read_super: bad magic number 0x%8.8lx " "on dev %d/%d\n", sb->s_magic, MAJOR(sb->s_dev), MINOR(sb->s_dev)); @@ -291,23 +292,35 @@ void ufs_put_super (struct super_block * sb) void ufs_statfs(struct super_block * sb, struct statfs * buf, int bufsiz) { struct statfs tmp; + struct statfs *sp = &tmp; + struct ufs_superblock *fsb = sb->u.ufs_sb.s_raw_sb; + unsigned long used, avail; if (sb->u.ufs_sb.s_flags & UFS_DEBUG) { printk("ufs_statfs\n"); /* XXX */ } - tmp.f_type = sb->s_magic; - tmp.f_bsize = sb->s_blocksize; - tmp.f_blocks = ufs_swab32(sb->u.ufs_sb.s_raw_sb->fs_dsize); - tmp.f_bfree = ufs_swab32(sb->u.ufs_sb.s_raw_sb->fs_cstotal.cs_nbfree); - tmp.f_bavail = ufs_swab32(sb->u.ufs_sb.s_raw_sb->fs_cstotal.cs_nbfree); - tmp.f_files = sb->u.ufs_sb.s_ncg * sb->u.ufs_sb.s_ipg; - tmp.f_ffree = ufs_swab32(sb->u.ufs_sb.s_raw_sb->fs_cstotal.cs_nifree); - tmp.f_fsid.val[0] = ufs_swab32(sb->u.ufs_sb.s_raw_sb->fs_id[0]); - tmp.f_fsid.val[1] = ufs_swab32(sb->u.ufs_sb.s_raw_sb->fs_id[1]); - tmp.f_namelen = UFS_MAXNAMLEN; - - copy_to_user(buf, &tmp, bufsiz); + sp->f_type = sb->s_magic; + sp->f_bsize = sb->s_blocksize; + sp->f_blocks = ufs_swab32(fsb->fs_dsize); + sp->f_bfree = ufs_swab32(fsb->fs_cstotal.cs_nbfree) * + ufs_swab32(fsb->fs_frag) + + ufs_swab32(fsb->fs_cstotal.cs_nffree); + + avail = sp->f_blocks - (sp->f_blocks / 100) * + ufs_swab32(fsb->fs_minfree); + used = sp->f_blocks - sp->f_bfree; + if (avail > used) + sp->f_bavail = avail - used; + else + sp->f_bavail = 0; + + sp->f_files = sb->u.ufs_sb.s_ncg * sb->u.ufs_sb.s_ipg; + sp->f_ffree = ufs_swab32(fsb->fs_cstotal.cs_nifree); + sp->f_fsid.val[0] = ufs_swab32(fsb->fs_id[0]); + sp->f_fsid.val[1] = ufs_swab32(fsb->fs_id[1]); + sp->f_namelen = UFS_MAXNAMLEN; + + copy_to_user(buf, sp, bufsiz); return; } - diff --git a/fs/ufs/ufs_symlink.c b/fs/ufs/ufs_symlink.c index 35322983e..13d2285e6 100644 --- a/fs/ufs/ufs_symlink.c +++ b/fs/ufs/ufs_symlink.c @@ -6,7 +6,7 @@ * Laboratory for Computer Science Research Computing Facility * Rutgers, The State University of New Jersey * - * $Id: ufs_symlink.c,v 1.5 1996/05/19 03:55:56 krioles Exp $ + * $Id: ufs_symlink.c,v 1.7 1997/01/26 07:14:29 davem Exp $ * */ @@ -139,7 +139,7 @@ static struct file_operations ufs_symlink_operations = { NULL, /* read */ NULL, /* write */ NULL, /* readdir */ - NULL, /* select */ + NULL, /* poll */ NULL, /* ioctl */ NULL, /* mmap */ NULL, /* open */ diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c index 9e0abf55a..8f8a6bbb6 100644 --- a/fs/umsdos/dir.c +++ b/fs/umsdos/dir.c @@ -791,7 +791,7 @@ static struct file_operations umsdos_dir_operations = { UMSDOS_dir_read, /* read */ NULL, /* write - bad */ UMSDOS_readdir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ UMSDOS_ioctl_dir, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/umsdos/emd.c b/fs/umsdos/emd.c index 6295d0a98..0d4a89298 100644 --- a/fs/umsdos/emd.c +++ b/fs/umsdos/emd.c @@ -64,8 +64,31 @@ long umsdos_emd_dir_write (struct inode *emd_dir, unsigned long count) { int written; +#ifdef __BIG_ENDIAN + struct umsdos_dirent *d = (struct umsdos_dirent *)buf; +#endif filp->f_flags = 0; +#ifdef __BIG_ENDIAN + d->nlink = cpu_to_le16 (d->nlink); + d->uid = cpu_to_le16 (d->uid); + d->gid = cpu_to_le16 (d->gid); + d->atime = cpu_to_le32 (d->atime); + d->mtime = cpu_to_le32 (d->mtime); + d->ctime = cpu_to_le32 (d->ctime); + d->rdev = cpu_to_le16 (d->rdev); + d->mode = cpu_to_le16 (d->mode); +#endif written = umsdos_file_write_kmem (emd_dir,filp,buf,count); +#ifdef __BIG_ENDIAN + d->nlink = le16_to_cpu (d->nlink); + d->uid = le16_to_cpu (d->uid); + d->gid = le16_to_cpu (d->gid); + d->atime = le32_to_cpu (d->atime); + d->mtime = le32_to_cpu (d->mtime); + d->ctime = le32_to_cpu (d->ctime); + d->rdev = le16_to_cpu (d->rdev); + d->mode = le16_to_cpu (d->mode); +#endif return written != count ? -EIO : 0; } /* @@ -80,6 +103,9 @@ long umsdos_emd_dir_read (struct inode *emd_dir, { long int ret = 0; int sizeread; +#ifdef __BIG_ENDIAN + struct umsdos_dirent *d = (struct umsdos_dirent *)buf; +#endif filp->f_flags = 0; sizeread = umsdos_file_read_kmem (emd_dir,filp,buf,count); if (sizeread != count){ @@ -87,6 +113,16 @@ long umsdos_emd_dir_read (struct inode *emd_dir, ,filp->f_pos,sizeread,count); ret = -EIO; } +#ifdef __BIG_ENDIAN + d->nlink = le16_to_cpu (d->nlink); + d->uid = le16_to_cpu (d->uid); + d->gid = le16_to_cpu (d->gid); + d->atime = le32_to_cpu (d->atime); + d->mtime = le32_to_cpu (d->mtime); + d->ctime = le32_to_cpu (d->ctime); + d->rdev = le16_to_cpu (d->rdev); + d->mode = le16_to_cpu (d->mode); +#endif return ret; } diff --git a/fs/umsdos/file.c b/fs/umsdos/file.c index d01ef6f87..cda0e4e8d 100644 --- a/fs/umsdos/file.c +++ b/fs/umsdos/file.c @@ -66,7 +66,7 @@ struct file_operations umsdos_file_operations = { UMSDOS_file_read, /* read */ UMSDOS_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ generic_file_mmap, /* mmap */ NULL, /* no special open is needed */ @@ -100,7 +100,7 @@ struct file_operations umsdos_file_operations_no_bmap = { UMSDOS_file_read, /* read */ UMSDOS_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ fat_mmap, /* mmap */ NULL, /* no special open is needed */ diff --git a/fs/umsdos/inode.c b/fs/umsdos/inode.c index 8157edca6..8a00fd833 100644 --- a/fs/umsdos/inode.c +++ b/fs/umsdos/inode.c @@ -1,13 +1,14 @@ /* * linux/fs/umsdos/inode.c * - * Written 1993 by Jacques Gelinas + * Written 1993 by Jacques Gelinas * Inspired from linux/fs/msdos/... by Werner Almesberger * */ #include <linux/module.h> +#include <linux/init.h> #include <linux/fs.h> #include <linux/msdos_fs.h> #include <linux/kernel.h> @@ -275,7 +276,7 @@ int UMSDOS_notify_change(struct inode *inode, struct iattr *attr) { int ret = 0; - if ((ret = inode_change_ok(inode, attr)) != 0) + if ((ret = inode_change_ok(inode, attr)) != 0) return ret; if (inode->i_nlink > 0){ @@ -321,17 +322,17 @@ int UMSDOS_notify_change(struct inode *inode, struct iattr *attr) ret = umsdos_emd_dir_read (emd_owner,&filp,(char*)&entry ,UMSDOS_REC_SIZE); if (ret == 0){ - if (attr->ia_valid & ATTR_UID) + if (attr->ia_valid & ATTR_UID) entry.uid = attr->ia_uid; - if (attr->ia_valid & ATTR_GID) + if (attr->ia_valid & ATTR_GID) entry.gid = attr->ia_gid; - if (attr->ia_valid & ATTR_MODE) + if (attr->ia_valid & ATTR_MODE) entry.mode = attr->ia_mode; - if (attr->ia_valid & ATTR_ATIME) + if (attr->ia_valid & ATTR_ATIME) entry.atime = attr->ia_atime; - if (attr->ia_valid & ATTR_MTIME) + if (attr->ia_valid & ATTR_MTIME) entry.mtime = attr->ia_mtime; - if (attr->ia_valid & ATTR_CTIME) + if (attr->ia_valid & ATTR_CTIME) entry.ctime = attr->ia_ctime; entry.nlink = inode->i_nlink; @@ -352,7 +353,7 @@ int UMSDOS_notify_change(struct inode *inode, struct iattr *attr) PRINTK (("\n")); } } - if (ret == 0) + if (ret == 0) inode_setattr(inode, attr); return ret; } @@ -366,7 +367,7 @@ int UMSDOS_notify_change(struct inode *inode, struct iattr *attr) so it's easier to tell them apart. */ -static struct super_operations umsdos_sops = { +static struct super_operations umsdos_sops = { UMSDOS_read_inode, UMSDOS_notify_change, UMSDOS_write_inode, @@ -431,7 +432,7 @@ struct super_block *UMSDOS_read_super( This feature allows the installation of a linux system within a DOS system in a subdirectory. - + A user may install its linux stuff in c:\linux avoiding any clash with existing DOS file and subdirectory. When linux boots, it hides this fact, showing a normal @@ -499,19 +500,17 @@ static struct file_system_type umsdos_fs_type = { UMSDOS_read_super, "umsdos", 1, NULL }; -int init_umsdos_fs(void) +__initfunc(int init_umsdos_fs(void)) { return register_filesystem(&umsdos_fs_type); } #ifdef MODULE +EXPORT_NO_SYMBOLS; + int init_module(void) { - int status; - - if ((status = init_umsdos_fs()) == 0) - register_symtab(0); - return status; + return init_umsdos_fs(); } void cleanup_module(void) @@ -520,4 +519,3 @@ void cleanup_module(void) } #endif - diff --git a/fs/umsdos/rdir.c b/fs/umsdos/rdir.c index da07c0d61..a2a5364f6 100644 --- a/fs/umsdos/rdir.c +++ b/fs/umsdos/rdir.c @@ -240,7 +240,7 @@ static struct file_operations umsdos_rdir_operations = { UMSDOS_dir_read, /* read */ NULL, /* write - bad */ UMSDOS_rreaddir, /* readdir */ - NULL, /* select - default */ + NULL, /* poll - default */ UMSDOS_ioctl_dir, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open code */ diff --git a/fs/umsdos/symlink.c b/fs/umsdos/symlink.c index 7bbcc0ef0..8b6678ae9 100644 --- a/fs/umsdos/symlink.c +++ b/fs/umsdos/symlink.c @@ -116,7 +116,7 @@ static struct file_operations umsdos_symlink_operations = { NULL, /* read */ NULL, /* write */ NULL, /* readdir - bad */ - NULL, /* select - default */ + NULL, /* poll - default */ NULL, /* ioctl - default */ NULL, /* mmap */ NULL, /* no special open is needed */ diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index f0d581771..badb6c7e2 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -10,6 +10,7 @@ * the problem, send a script that demonstrates it. */ +#include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> @@ -19,6 +20,7 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/mm.h> +#include <linux/init.h> #include <asm/uaccess.h> @@ -1572,34 +1574,23 @@ void vfat_read_inode(struct inode *inode) fat_read_inode(inode, &vfat_dir_inode_operations); } - - - static struct file_system_type vfat_fs_type = { vfat_read_super, "vfat", 1, NULL }; -static struct symbol_table vfat_syms = { -#include <linux/symtab_begin.h> - X(vfat_create), - X(vfat_unlink), - X(vfat_mkdir), - X(vfat_rmdir), - X(vfat_rename), - X(vfat_put_super), - X(vfat_read_super), - X(vfat_read_inode), - X(vfat_lookup), -#include <linux/symtab_end.h> -}; - -int init_vfat_fs(void) +EXPORT_SYMBOL(vfat_create); +EXPORT_SYMBOL(vfat_unlink); +EXPORT_SYMBOL(vfat_mkdir); +EXPORT_SYMBOL(vfat_rmdir); +EXPORT_SYMBOL(vfat_rename); +EXPORT_SYMBOL(vfat_put_super); +EXPORT_SYMBOL(vfat_read_super); +EXPORT_SYMBOL(vfat_read_inode); +EXPORT_SYMBOL(vfat_lookup); + +__initfunc(int init_vfat_fs(void)) { - int status; - - if ((status = register_filesystem(&vfat_fs_type)) == 0) - status = register_symtab(&vfat_syms); - return status; + return register_filesystem(&vfat_fs_type); } #ifdef MODULE @@ -1608,7 +1599,6 @@ int init_module(void) return init_vfat_fs(); } - void cleanup_module(void) { unregister_filesystem(&vfat_fs_type); diff --git a/fs/xiafs/bitmap.c b/fs/xiafs/bitmap.c deleted file mode 100644 index 15028ce85..000000000 --- a/fs/xiafs/bitmap.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * linux/fs/xiafs/bitmap.c - * - * Copyright (C) Q. Frank Xia, 1993. - * - * Based on Linus' minix/bitmap.c - * Copyright (C) Linus Torvalds, 1991, 1992. - * - * This software may be redistributed per Linux Copyright. - */ - -/* bitmap.c contains the code that handles the inode and block bitmaps */ - -#include <linux/sched.h> -#include <linux/locks.h> -#include <linux/xia_fs.h> -#include <linux/stat.h> -#include <linux/kernel.h> -#include <linux/string.h> - -#include <asm/bitops.h> - -#include "xiafs_mac.h" - - -char internal_error_message[]="XIA-FS: internal error %s %d\n"; - -static int find_first_zero(struct buffer_head *bh, int start_bit, int end_bit) -{ - /* This routine searches first 0 bit from (start_bit) to (end_bit-1). - * If found the bit is set to 1 and the bit # is returned, otherwise, - * -1 is returned. Race condition is avoid by using "btsl" and - * "goto repeat". ---Frank. - */ - - int end, i, j, tmp; - u_long *bmap; - - bmap=(u_long *)bh->b_data; - end = end_bit >> 5; - -repeat: - i=start_bit >> 5; - if ( (tmp=(~bmap[i]) & (0xffffffff << (start_bit & 31))) ) - goto zone_found; - while (++i < end) - if (~bmap[i]) { - tmp=~bmap[i]; - goto zone_found; - } - if ( !(tmp=~bmap[i] & ((1 << (end_bit & 31)) -1)) ) - return -1; -zone_found: - for (j=0; j < 32; j++) - if (tmp & (1 << j)) - break; - if (set_bit(j,bmap+i)) { - start_bit=j + (i << 5) + 1; - goto repeat; - } - mark_buffer_dirty(bh, 1); - return j + (i << 5); -} - -static void clear_buf(struct buffer_head * bh) -{ - register int i; - register long * lp; - - lp=(long *)bh->b_data; - for (i= bh->b_size >> 2; i-- > 0; ) - *lp++=0; -} - -static void que(struct buffer_head * bmap[], int bznr[], int pos) -{ - struct buffer_head * tbh; - int tmp; - int i; - - tbh=bmap[pos]; - tmp=bznr[pos]; - for (i=pos; i > 0; i--) { - bmap[i]=bmap[i-1]; - bznr[i]=bznr[i-1]; - } - bmap[0]=tbh; - bznr[0]=tmp; -} - -#define get_imap_zone(sb, bit_nr, not_que) \ - get__map_zone((sb), (sb)->u.xiafs_sb.s_imap_buf, \ - (sb)->u.xiafs_sb.s_imap_iznr, \ - (sb)->u.xiafs_sb.s_imap_cached, 1, \ - (sb)->u.xiafs_sb.s_imap_zones, _XIAFS_IMAP_SLOTS, \ - bit_nr, not_que) - -#define get_zmap_zone(sb, bit_nr, not_que) \ - get__map_zone((sb), (sb)->u.xiafs_sb.s_zmap_buf, \ - (sb)->u.xiafs_sb.s_zmap_zznr, \ - (sb)->u.xiafs_sb.s_zmap_cached, \ - 1+(sb)->u.xiafs_sb.s_imap_zones, \ - (sb)->u.xiafs_sb.s_zmap_zones, _XIAFS_ZMAP_SLOTS, \ - bit_nr, not_que) - -static struct buffer_head * -get__map_zone(struct super_block *sb, struct buffer_head * bmap_buf[], - int bznr[], u_char cache, int first_zone, - int bmap_zones, int slots, u_long bit_nr, int * not_que) -{ - struct buffer_head * tmp_bh; - int z_nr, i; - - z_nr = bit_nr >> XIAFS_BITS_PER_Z_BITS(sb); - if (z_nr >= bmap_zones) { - printk("XIA-FS: bad inode/zone number (%s %d)\n", WHERE_ERR); - return NULL; - } - if (!cache) - return bmap_buf[z_nr]; - lock_super(sb); - for (i=0; i < slots; i++) - if (bznr[i]==z_nr) - break; - if (i < slots) { /* cache hit */ - if (not_que) { - *not_que=i; - return bmap_buf[i]; - } else { - que(bmap_buf, bznr, i); - return bmap_buf[0]; - } - } - tmp_bh=bread(sb->s_dev, z_nr+first_zone, XIAFS_ZSIZE(sb)); /* cache not hit */ - if (!tmp_bh) { - printk("XIA-FS: read bitmap failed (%s %d)\n", WHERE_ERR); - unlock_super(sb); - return NULL; - } - brelse(bmap_buf[slots-1]); - bmap_buf[slots-1]=tmp_bh; - bznr[slots-1]=z_nr; - if (not_que) - *not_que=slots-1; - else - que(bmap_buf, bznr, slots-1); - return tmp_bh; -} - -#define xiafs_unlock_super(sb, cache) if (cache) unlock_super(sb); - -#define get_free_ibit(sb, prev_bit) \ - get_free__bit(sb, sb->u.xiafs_sb.s_imap_buf, \ - sb->u.xiafs_sb.s_imap_iznr, \ - sb->u.xiafs_sb.s_imap_cached, \ - 1, sb->u.xiafs_sb.s_imap_zones, \ - _XIAFS_IMAP_SLOTS, prev_bit); - -#define get_free_zbit(sb, prev_bit) \ - get_free__bit(sb, sb->u.xiafs_sb.s_zmap_buf, \ - sb->u.xiafs_sb.s_zmap_zznr, \ - sb->u.xiafs_sb.s_zmap_cached, \ - 1 + sb->u.xiafs_sb.s_imap_zones, \ - sb->u.xiafs_sb.s_zmap_zones, \ - _XIAFS_ZMAP_SLOTS, prev_bit); - -static u_long -get_free__bit(struct super_block *sb, struct buffer_head * bmap_buf[], - int bznr[], u_char cache, int first_zone, int bmap_zones, - int slots, u_long prev_bit) -{ - struct buffer_head * bh; - int not_done=0; - u_long pos, start_bit, end_bit, total_bits; - int z_nr, tmp; - - total_bits=bmap_zones << XIAFS_BITS_PER_Z_BITS(sb); - if (prev_bit >= total_bits) - prev_bit=0; - pos=prev_bit+1; - end_bit=XIAFS_BITS_PER_Z(sb); - - do { - if (pos >= total_bits) - pos=0; - if (!not_done) { /* first time */ - not_done=1; - start_bit= pos & (end_bit-1); - } else - start_bit=0; - if ( pos < prev_bit && pos+end_bit >= prev_bit) { /* last time */ - not_done=0; - end_bit=prev_bit & (end_bit-1); /* only here end_bit modified */ - } - bh = get__map_zone(sb, bmap_buf, bznr, cache, first_zone, - bmap_zones, slots, pos, &z_nr); - if (!bh) - return 0; - tmp=find_first_zero(bh, start_bit, end_bit); - if (tmp >= 0) - break; - xiafs_unlock_super(sb, sb->u.xiafs_sb.s_zmap_cached); - pos=(pos & ~(end_bit-1))+end_bit; - } while (not_done); - - if (tmp < 0) - return 0; - if (cache) - que(bmap_buf, bznr, z_nr); - xiafs_unlock_super(sb, cache); - return (pos & ~(XIAFS_BITS_PER_Z(sb)-1))+tmp; -} - -void xiafs_free_zone(struct super_block * sb, int d_addr) -{ - struct buffer_head * bh; - unsigned int bit, offset; - - if (!sb) { - printk(INTERN_ERR); - return; - } - if (d_addr < sb->u.xiafs_sb.s_firstdatazone || - d_addr >= sb->u.xiafs_sb.s_nzones) { - printk("XIA-FS: bad zone number (%s %d)\n", WHERE_ERR); - return; - } - bh = get_hash_table(sb->s_dev, d_addr, XIAFS_ZSIZE(sb)); - if (bh) - mark_buffer_clean(bh); - brelse(bh); - bit=d_addr - sb->u.xiafs_sb.s_firstdatazone + 1; - bh = get_zmap_zone(sb, bit, NULL); - if (!bh) - return; - offset = bit & (XIAFS_BITS_PER_Z(sb) -1); - if (!clear_bit(offset, bh->b_data)) - printk("XIA-FS: dev %s" - " block bit %u (0x%x) already cleared (%s %d)\n", - kdevname(sb->s_dev), bit, bit, WHERE_ERR); - mark_buffer_dirty(bh, 1); - xiafs_unlock_super(sb, sb->u.xiafs_sb.s_zmap_cached); -} - -int xiafs_new_zone(struct super_block * sb, u_long prev_addr) -{ - struct buffer_head * bh; - int prev_znr, tmp; - - if (!sb) { - printk(INTERN_ERR); - return 0; - } - if (prev_addr < sb->u.xiafs_sb.s_firstdatazone || - prev_addr >= sb->u.xiafs_sb.s_nzones) { - prev_addr=sb->u.xiafs_sb.s_firstdatazone; - } - prev_znr=prev_addr-sb->u.xiafs_sb.s_firstdatazone+1; - tmp=get_free_zbit(sb, prev_znr); - if (!tmp) - return 0; - tmp += sb->u.xiafs_sb.s_firstdatazone -1; - if (!(bh = getblk(sb->s_dev, tmp, XIAFS_ZSIZE(sb)))) { - printk("XIA-FS: I/O error (%s %d)\n", WHERE_ERR); - return 0; - } - if (bh->b_count != 1) { - printk(INTERN_ERR); - return 0; - } - clear_buf(bh); - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 1); - brelse(bh); - return tmp; -} - -void xiafs_free_inode(struct inode * inode) -{ - struct buffer_head * bh; - struct super_block * sb; - unsigned long ino; - - if (!inode) - return; - if (!inode->i_dev || inode->i_count!=1 - || inode->i_nlink || !inode->i_sb || inode->i_ino < 3 - || inode->i_ino > inode->i_sb->u.xiafs_sb.s_ninodes) { - printk("XIA-FS: bad inode (%s %d)\n", WHERE_ERR); - return; - } - sb = inode->i_sb; - ino = inode->i_ino; - bh = get_imap_zone(sb, ino, NULL); - if (!bh) - return; - clear_inode(inode); - if (!clear_bit(ino & (XIAFS_BITS_PER_Z(sb)-1), bh->b_data)) - printk("XIA-FS: dev %s" - "inode bit %ld (0x%lx) already cleared (%s %d)\n", - kdevname(inode->i_dev), ino, ino, WHERE_ERR); - mark_buffer_dirty(bh, 1); - xiafs_unlock_super(sb, sb->u.xiafs_sb.s_imap_cached); -} - -struct inode * xiafs_new_inode(struct inode * dir) -{ - struct super_block * sb; - struct inode * inode; - ino_t tmp; - - sb = dir->i_sb; - if (!dir || !(inode = get_empty_inode())) - return NULL; - inode->i_sb = sb; - inode->i_flags = inode->i_sb->s_flags; - - tmp=get_free_ibit(sb, dir->i_ino); - if (!tmp) { - iput(inode); - return NULL; - } - inode->i_count = 1; - inode->i_nlink = 1; - inode->i_dev = sb->s_dev; - inode->i_uid = current->fsuid; - inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; - inode->i_dirt = 1; - inode->i_ino = tmp; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = NULL; - inode->i_blocks = 0; - inode->i_blksize = XIAFS_ZSIZE(inode->i_sb); - insert_inode_hash(inode); - return inode; -} - -static int nibblemap[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 }; - -static u_long count_zone(struct buffer_head * bh) -{ - int i, tmp; - u_long sum; - - sum=0; - for (i=bh->b_size; i-- > 0; ) { - tmp=bh->b_data[i]; - sum += nibblemap[tmp & 0xf] + nibblemap[(tmp & 0xff) >> 4]; - } - return sum; -} - -unsigned long xiafs_count_free_inodes(struct super_block *sb) -{ - struct buffer_head * bh; - int izones, i, not_que; - u_long sum; - - sum=0; - izones=sb->u.xiafs_sb.s_imap_zones; - for (i=0; i < izones; i++) { - bh=get_imap_zone(sb, i << XIAFS_BITS_PER_Z_BITS(sb), ¬_que); - if (bh) { - sum += count_zone(bh); - xiafs_unlock_super(sb, sb->u.xiafs_sb.s_imap_cached); - } - } - i=izones << XIAFS_BITS_PER_Z_BITS(sb); - return i - sum; -} - -unsigned long xiafs_count_free_zones(struct super_block *sb) -{ - struct buffer_head * bh; - int zzones, i, not_que; - u_long sum; - - sum=0; - zzones=sb->u.xiafs_sb.s_zmap_zones; - for (i=0; i < zzones; i++) { - bh=get_zmap_zone(sb, i << XIAFS_BITS_PER_Z_BITS(sb), ¬_que); - if (bh) { - sum += count_zone(bh); - xiafs_unlock_super(sb, sb->u.xiafs_sb.s_zmap_cached); - } - } - i=zzones << XIAFS_BITS_PER_Z_BITS(sb); - return i - sum; -} diff --git a/fs/xiafs/dir.c b/fs/xiafs/dir.c deleted file mode 100644 index 856d3cdcd..000000000 --- a/fs/xiafs/dir.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * linux/fs/xiafs/dir.c - * - * Copyright (C) Q. Frank Xia, 1993. - * - * Based on Linus' minix/dir.c - * Copyright (C) Linus Torvalds, 1991, 1992. - * - * This software may be redistributed per Linux Copyright. - */ - -#include <linux/sched.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/xia_fs.h> -#include <linux/stat.h> - -#include <asm/uaccess.h> - -#include "xiafs_mac.h" - -static long xiafs_dir_read(struct inode *, struct file *, char *, unsigned long); -static int xiafs_readdir(struct inode *, struct file *, void *, filldir_t); - -static struct file_operations xiafs_dir_operations = { - NULL, /* lseek - default */ - xiafs_dir_read, /* read */ - NULL, /* write - bad */ - xiafs_readdir, /* readdir */ - NULL, /* select - default */ - NULL, /* ioctl - default */ - NULL, /* mmap */ - NULL, /* no special open code */ - NULL, /* no special release code */ - file_fsync /* default fsync */ -}; - -/* - * directories can handle most operations... - */ -struct inode_operations xiafs_dir_inode_operations = { - &xiafs_dir_operations, /* default directory file-ops */ - xiafs_create, /* create */ - xiafs_lookup, /* lookup */ - xiafs_link, /* link */ - xiafs_unlink, /* unlink */ - xiafs_symlink, /* symlink */ - xiafs_mkdir, /* mkdir */ - xiafs_rmdir, /* rmdir */ - xiafs_mknod, /* mknod */ - xiafs_rename, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - xiafs_truncate, /* truncate */ - NULL /* permission */ -}; - -static long xiafs_dir_read(struct inode * inode, struct file * filp, - char * buf, unsigned long count) -{ - return -EISDIR; -} - -static int xiafs_readdir(struct inode * inode, struct file * filp, - void * dirent, filldir_t filldir) -{ - u_int offset, i; - struct buffer_head * bh; - struct xiafs_direct * de; - - if (!inode || !inode->i_sb || !S_ISDIR(inode->i_mode)) - return -EBADF; - if (inode->i_size & (XIAFS_ZSIZE(inode->i_sb) - 1) ) - return -EBADF; - while (filp->f_pos < inode->i_size) { - offset = filp->f_pos & (XIAFS_ZSIZE(inode->i_sb) - 1); - bh = xiafs_bread(inode, filp->f_pos >> XIAFS_ZSIZE_BITS(inode->i_sb),0); - if (!bh) { - filp->f_pos += XIAFS_ZSIZE(inode->i_sb)-offset; - continue; - } - for (i = 0; i < XIAFS_ZSIZE(inode->i_sb) && i < offset; ) { - de = (struct xiafs_direct *) (bh->b_data + i); - if (!de->d_rec_len) - break; - i += de->d_rec_len; - } - offset = i; - de = (struct xiafs_direct *) (offset + bh->b_data); - - while (offset < XIAFS_ZSIZE(inode->i_sb) && filp->f_pos < inode->i_size) { - if (de->d_ino > inode->i_sb->u.xiafs_sb.s_ninodes || - de->d_rec_len < 12 || - (char *)de+de->d_rec_len > XIAFS_ZSIZE(inode->i_sb)+bh->b_data || - de->d_name_len < 1 || de->d_name_len + 8 > de->d_rec_len || - de->d_name_len > _XIAFS_NAME_LEN || - de->d_name[de->d_name_len] ) { - printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR); - brelse(bh); - return 0; - } - if (de->d_ino) { - if (!IS_RDONLY (inode)) { - inode->i_atime=CURRENT_TIME; - inode->i_dirt=1; - } - if (filldir(dirent, de->d_name, de->d_name_len, filp->f_pos, de->d_ino) < 0) { - brelse(bh); - return 0; - } - } - offset += de->d_rec_len; - filp->f_pos += de->d_rec_len; - de = (struct xiafs_direct *) (offset + bh->b_data); - } - brelse(bh); - if (offset > XIAFS_ZSIZE(inode->i_sb)) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - return 0; - } - } - if (!IS_RDONLY (inode)) { - inode->i_atime=CURRENT_TIME; - inode->i_dirt=1; - } - return 0; -} diff --git a/fs/xiafs/file.c b/fs/xiafs/file.c deleted file mode 100644 index 822b4b520..000000000 --- a/fs/xiafs/file.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * linux/fs/xiafs/file.c - * - * Copyright (C) Q. Frank Xia, 1993. - * - * Based on Linus' minix/file.c - * Copyright (C) Linus Torvalds, 1991, 1992. - * - * This software may be redistributed per Linux Copyright. - */ - -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/xia_fs.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/locks.h> -#include <linux/pagemap.h> - -#include <asm/uaccess.h> -#include <asm/system.h> - -#include "xiafs_mac.h" - -#define NBUF 32 - -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MAX(a,b) (((a)>(b))?(a):(b)) - -static long xiafs_file_read(struct inode *, struct file *, char *, unsigned long); -static long xiafs_file_write(struct inode *, struct file *, const char *, unsigned long); - -/* - * We have mostly NULL's here: the current defaults are ok for - * the xiafs filesystem. - */ -static struct file_operations xiafs_file_operations = { - NULL, /* lseek - default */ - xiafs_file_read, /* read */ - xiafs_file_write, /* write */ - NULL, /* readdir - bad */ - NULL, /* select - default */ - NULL, /* ioctl - default */ - generic_file_mmap, /* mmap */ - NULL, /* no special open is needed */ - NULL, /* release */ - xiafs_sync_file /* fsync */ -}; - -struct inode_operations xiafs_file_inode_operations = { - &xiafs_file_operations, /* default file operations */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - generic_readpage, /* readpage */ - NULL, /* writepage */ - xiafs_bmap, /* bmap */ - xiafs_truncate, /* truncate */ - NULL /* permission */ -}; - -static long -xiafs_file_read(struct inode * inode, struct file * filp, char * buf, unsigned long count) -{ - int read, left, chars; - int zone_nr, zones, f_zones, offset; - int bhrequest, uptodate; - struct buffer_head ** bhb, ** bhe; - struct buffer_head * bhreq[NBUF]; - struct buffer_head * buflist[NBUF]; - - if (!inode) { - printk("XIA-FS: inode = NULL (%s %d)\n", WHERE_ERR); - return -EINVAL; - } - if (!S_ISREG(inode->i_mode)) { - printk("XIA-FS: mode != regular (%s %d)\n", WHERE_ERR); - return -EINVAL; - } - offset = filp->f_pos; - left = inode->i_size - offset; - if (left > count) - left = count; - if (left <= 0) - return 0; - read = 0; - zone_nr = offset >> XIAFS_ZSIZE_BITS(inode->i_sb); - offset &= XIAFS_ZSIZE(inode->i_sb) -1 ; - f_zones =(inode->i_size+XIAFS_ZSIZE(inode->i_sb)-1)>>XIAFS_ZSIZE_BITS(inode->i_sb); - zones = (left+offset+XIAFS_ZSIZE(inode->i_sb)-1) >> XIAFS_ZSIZE_BITS(inode->i_sb); - bhb = bhe = buflist; - if (filp->f_reada) { - if(zones < read_ahead[MAJOR(inode->i_dev)] >> (1+XIAFS_ZSHIFT(inode->i_sb))) - zones = read_ahead[MAJOR(inode->i_dev)] >> (1+XIAFS_ZSHIFT(inode->i_sb)); - if (zone_nr + zones > f_zones) - zones = f_zones - zone_nr; - } - - /* We do this in a two stage process. We first try to request - as many blocks as we can, then we wait for the first one to - complete, and then we try to wrap up as many as are actually - done. This routine is rather generic, in that it can be used - in a filesystem by substituting the appropriate function in - for getblk. - - This routine is optimized to make maximum use of the various - buffers and caches. */ - - do { - bhrequest = 0; - uptodate = 1; - while (zones--) { - *bhb = xiafs_getblk(inode, zone_nr++, 0); - if (*bhb && !buffer_uptodate(*bhb)) { - uptodate = 0; - bhreq[bhrequest++] = *bhb; - } - - if (++bhb == &buflist[NBUF]) - bhb = buflist; - - /* If the block we have on hand is uptodate, go ahead - and complete processing. */ - if (uptodate) - break; - if (bhb == bhe) - break; - } - - /* Now request them all */ - if (bhrequest) - ll_rw_block(READ, bhrequest, bhreq); - - do { /* Finish off all I/O that has actually completed */ - if (*bhe) { - wait_on_buffer(*bhe); - if (!buffer_uptodate(*bhe)) { /* read error? */ - brelse(*bhe); - if (++bhe == &buflist[NBUF]) - bhe = buflist; - left = 0; - break; - } - } - if (left < XIAFS_ZSIZE(inode->i_sb) - offset) - chars = left; - else - chars = XIAFS_ZSIZE(inode->i_sb) - offset; - filp->f_pos += chars; - left -= chars; - read += chars; - if (*bhe) { - copy_to_user(buf,offset+(*bhe)->b_data,chars); - brelse(*bhe); - buf += chars; - } else { - while (chars-->0) - put_user(0,buf++); - } - offset = 0; - if (++bhe == &buflist[NBUF]) - bhe = buflist; - } while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe))); - } while (left > 0); - -/* Release the read-ahead blocks */ - while (bhe != bhb) { - brelse(*bhe); - if (++bhe == &buflist[NBUF]) - bhe = buflist; - }; - if (!read) - return -EIO; - filp->f_reada = 1; - if (!IS_RDONLY (inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } - return read; -} - -static long -xiafs_file_write(struct inode * inode, struct file * filp, const char * buf, unsigned long count) -{ - off_t pos; - int written, c; - struct buffer_head * bh; - char * cp; - - if (!inode) { - printk("XIA-FS: inode = NULL (%s %d)\n", WHERE_ERR); - return -EINVAL; - } - if (!S_ISREG(inode->i_mode)) { - printk("XIA-FS: mode != regular (%s %d)\n", WHERE_ERR); - return -EINVAL; - } -/* - * ok, append may not work when many processes are writing at the same time - * but so what. That way leads to madness anyway. - */ - if (filp->f_flags & O_APPEND) - pos = inode->i_size; - else - pos = filp->f_pos; - written = 0; - while (written < count) { - bh = xiafs_getblk(inode, pos >> XIAFS_ZSIZE_BITS(inode->i_sb), 1); - if (!bh) { - if (!written) - written = -ENOSPC; - break; - } - c = XIAFS_ZSIZE(inode->i_sb) - (pos & (XIAFS_ZSIZE(inode->i_sb) - 1)); - if (c > count-written) - c = count-written; - if (c != XIAFS_ZSIZE(inode->i_sb) && !buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - if (!written) - written = -EIO; - break; - } - } - cp = (pos & (XIAFS_ZSIZE(inode->i_sb)-1)) + bh->b_data; - copy_from_user(cp,buf,c); - update_vm_cache(inode,pos,cp,c); - pos += c; - if (pos > inode->i_size) { - inode->i_size = pos; - inode->i_dirt = 1; - } - written += c; - buf += c; - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 0); - brelse(bh); - } - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - filp->f_pos = pos; - inode->i_dirt = 1; - - return written; -} diff --git a/fs/xiafs/fsync.c b/fs/xiafs/fsync.c deleted file mode 100644 index f491e3d8e..000000000 --- a/fs/xiafs/fsync.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * linux/fs/xiafs/fsync.c - * - * Changes Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) - * from - * Copyright (C) 1991, 1992 Linus Torvalds - * - * xiafs fsync primitive - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/locks.h> - -#include <linux/fs.h> -#include <linux/xia_fs.h> - -#include <asm/uaccess.h> -#include <asm/system.h> - -#include "xiafs_mac.h" - - -#define blocksize (XIAFS_ZSIZE(inode->i_sb)) -#define addr_per_block (XIAFS_ADDRS_PER_Z(inode->i_sb)) - -static int sync_block (struct inode * inode, unsigned long * block, int wait) -{ - struct buffer_head * bh; - int tmp; - - if (!*block) - return 0; - tmp = *block; - bh = get_hash_table(inode->i_dev, *block, blocksize); - if (!bh) - return 0; - if (*block != tmp) { - brelse (bh); - return 1; - } - if (wait && buffer_req(bh) && !buffer_uptodate(bh)) { - brelse(bh); - return -1; - } - if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) - { - brelse(bh); - return 0; - } - ll_rw_block(WRITE, 1, &bh); - bh->b_count--; - return 0; -} - -static int sync_iblock (struct inode * inode, unsigned long * iblock, - struct buffer_head **bh, int wait) -{ - int rc, tmp; - - *bh = NULL; - tmp = *iblock; - if (!tmp) - return 0; - rc = sync_block (inode, iblock, wait); - if (rc) - return rc; - *bh = bread(inode->i_dev, tmp, blocksize); - if (tmp != *iblock) { - brelse(*bh); - *bh = NULL; - return 1; - } - if (!*bh) - return -1; - return 0; -} - - -static int sync_direct(struct inode *inode, int wait) -{ - int i; - int rc, err = 0; - - for (i = 0; i < 8; i++) { - rc = sync_block (inode, inode->u.ext_i.i_data + i, wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - return err; -} - -static int sync_indirect(struct inode *inode, unsigned long *iblock, int wait) -{ - int i; - struct buffer_head * ind_bh; - int rc, err = 0; - - rc = sync_iblock (inode, iblock, &ind_bh, wait); - if (rc || !ind_bh) - return rc; - - for (i = 0; i < addr_per_block; i++) { - rc = sync_block (inode, - ((unsigned long *) ind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(ind_bh); - return err; -} - -static int sync_dindirect(struct inode *inode, unsigned long *diblock, - int wait) -{ - int i; - struct buffer_head * dind_bh; - int rc, err = 0; - - rc = sync_iblock (inode, diblock, &dind_bh, wait); - if (rc || !dind_bh) - return rc; - - for (i = 0; i < addr_per_block; i++) { - rc = sync_indirect (inode, - ((unsigned long *) dind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(dind_bh); - return err; -} - -int xiafs_sync_file(struct inode * inode, struct file * file) -{ - int wait, err = 0; - - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return -EINVAL; - for (wait=0; wait<=1; wait++) - { - err |= sync_direct(inode, wait); - err |= sync_indirect(inode, &inode->u.xiafs_i.i_ind_zone, wait); - err |= sync_dindirect(inode, &inode->u.xiafs_i.i_dind_zone, wait); - } - err |= xiafs_sync_inode (inode); - return (err < 0) ? -EIO : 0; -} diff --git a/fs/xiafs/inode.c b/fs/xiafs/inode.c deleted file mode 100644 index 48b31e972..000000000 --- a/fs/xiafs/inode.c +++ /dev/null @@ -1,540 +0,0 @@ -/* - * linux/fs/xiafs/inode.c - * - * Copyright (C) Q. Frank Xia, 1993. - * - * Based on Linus' minix/inode.c - * Copyright (C) Linus Torvalds, 1991, 1992. - * - * This software may be redistributed per Linux Copyright. - */ - -#include <linux/module.h> - -#include <linux/sched.h> -#include <linux/xia_fs.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/locks.h> -#include <asm/system.h> -#include <asm/uaccess.h> - -#include "xiafs_mac.h" - -static u_long random_nr; - -void xiafs_put_inode(struct inode *inode) -{ - if (inode->i_nlink) - return; - inode->i_size = 0; - xiafs_truncate(inode); - xiafs_free_inode(inode); -} - -void xiafs_put_super(struct super_block *sb) -{ - int i; - - lock_super(sb); - sb->s_dev = 0; - for(i = 0 ; i < _XIAFS_IMAP_SLOTS ; i++) - brelse(sb->u.xiafs_sb.s_imap_buf[i]); - for(i = 0 ; i < _XIAFS_ZMAP_SLOTS ; i++) - brelse(sb->u.xiafs_sb.s_zmap_buf[i]); - unlock_super(sb); - MOD_DEC_USE_COUNT; -} - -static struct super_operations xiafs_sops = { - xiafs_read_inode, - NULL, - xiafs_write_inode, - xiafs_put_inode, - xiafs_put_super, - NULL, - xiafs_statfs, - NULL -}; - -struct super_block *xiafs_read_super(struct super_block *s, void *data, - int silent) -{ - struct buffer_head *bh; - struct xiafs_super_block *sp; - int i, z; - kdev_t dev; - - MOD_INC_USE_COUNT; - dev = s->s_dev; - lock_super(s); - - set_blocksize(dev, BLOCK_SIZE); - - if (!(bh = bread(dev, 0, BLOCK_SIZE))) { - s->s_dev = 0; - unlock_super(s); - printk("XIA-FS: read super_block failed (%s %d)\n", WHERE_ERR); - MOD_DEC_USE_COUNT; - return NULL; - } - sp = (struct xiafs_super_block *) bh->b_data; - s->s_magic = sp->s_magic; - if (s->s_magic != _XIAFS_SUPER_MAGIC) { - s->s_dev = 0; - unlock_super(s); - brelse(bh); - if (!silent) - printk("VFS: Can't find a xiafs filesystem on dev " - "%s.\n", kdevname(dev)); - MOD_DEC_USE_COUNT; - return NULL; - } - s->s_blocksize = sp->s_zone_size; - s->s_blocksize_bits = 10 + sp->s_zone_shift; - if (s->s_blocksize != BLOCK_SIZE && - (s->s_blocksize == 1024 || s->s_blocksize == 2048 || - s->s_blocksize == 4096)) { - brelse(bh); - set_blocksize(dev, s->s_blocksize); - bh = bread (dev, 0, s->s_blocksize); - if(!bh) { - MOD_DEC_USE_COUNT; - return NULL; - } - sp = (struct xiafs_super_block *) (((char *)bh->b_data) + BLOCK_SIZE) ; - }; - s->u.xiafs_sb.s_nzones = sp->s_nzones; - s->u.xiafs_sb.s_ninodes = sp->s_ninodes; - s->u.xiafs_sb.s_ndatazones = sp->s_ndatazones; - s->u.xiafs_sb.s_imap_zones = sp->s_imap_zones; - s->u.xiafs_sb.s_zmap_zones = sp->s_zmap_zones; - s->u.xiafs_sb.s_firstdatazone = sp->s_firstdatazone; - s->u.xiafs_sb.s_zone_shift = sp->s_zone_shift; - s->u.xiafs_sb.s_max_size = sp->s_max_size; - brelse(bh); - for (i=0;i < _XIAFS_IMAP_SLOTS;i++) { - s->u.xiafs_sb.s_imap_buf[i] = NULL; - s->u.xiafs_sb.s_imap_iznr[i] = -1; - } - for (i=0;i < _XIAFS_ZMAP_SLOTS;i++) { - s->u.xiafs_sb.s_zmap_buf[i] = NULL; - s->u.xiafs_sb.s_zmap_zznr[i] = -1; - } - z=1; - if ( s->u.xiafs_sb.s_imap_zones > _XIAFS_IMAP_SLOTS ) - s->u.xiafs_sb.s_imap_cached=1; - else { - s->u.xiafs_sb.s_imap_cached=0; - for (i=0 ; i < s->u.xiafs_sb.s_imap_zones ; i++) { - if (!(s->u.xiafs_sb.s_imap_buf[i]=bread(dev, z++, XIAFS_ZSIZE(s)))) - goto xiafs_read_super_fail; - s->u.xiafs_sb.s_imap_iznr[i]=i; - } - } - if ( s->u.xiafs_sb.s_zmap_zones > _XIAFS_ZMAP_SLOTS ) - s->u.xiafs_sb.s_zmap_cached=1; - else { - s->u.xiafs_sb.s_zmap_cached=0; - for (i=0 ; i < s->u.xiafs_sb.s_zmap_zones ; i++) { - if (!(s->u.xiafs_sb.s_zmap_buf[i]=bread(dev, z++, XIAFS_ZSIZE(s)))) - goto xiafs_read_super_fail; - s->u.xiafs_sb.s_zmap_zznr[i]=i; - } - } - /* set up enough so that it can read an inode */ - s->s_dev = dev; - s->s_op = &xiafs_sops; - s->s_mounted = iget(s, _XIAFS_ROOT_INO); - if (!s->s_mounted) - goto xiafs_read_super_fail; - unlock_super(s); - random_nr=CURRENT_TIME; - return s; - -xiafs_read_super_fail: - for(i=0; i < _XIAFS_IMAP_SLOTS; i++) - brelse(s->u.xiafs_sb.s_imap_buf[i]); - for(i=0; i < _XIAFS_ZMAP_SLOTS; i++) - brelse(s->u.xiafs_sb.s_zmap_buf[i]); - s->s_dev = 0; - unlock_super(s); - printk("XIA-FS: read bitmaps failed (%s %d)\n", WHERE_ERR); - MOD_DEC_USE_COUNT; - return NULL; -} - -void xiafs_statfs(struct super_block *sb, struct statfs *buf, int bufsiz) -{ - struct statfs tmp; - - tmp.f_type = _XIAFS_SUPER_MAGIC; - tmp.f_bsize = XIAFS_ZSIZE(sb); - tmp.f_blocks = sb->u.xiafs_sb.s_ndatazones; - tmp.f_bfree = xiafs_count_free_zones(sb); - tmp.f_bavail = tmp.f_bfree; - tmp.f_files = sb->u.xiafs_sb.s_ninodes; - tmp.f_ffree = xiafs_count_free_inodes(sb); - tmp.f_namelen = _XIAFS_NAME_LEN; - copy_to_user(buf, &tmp, bufsiz); -} - -static int zone_bmap(struct buffer_head * bh, int nr) -{ - int tmp; - - if (!bh) - return 0; - tmp = ((u_long *) bh->b_data)[nr]; - brelse(bh); - return tmp; -} - -int xiafs_bmap(struct inode * inode,int zone) -{ - int i; - - if (zone < 0) { - printk("XIA-FS: block < 0 (%s %d)\n", WHERE_ERR); - return 0; - } - if (zone >= 8+(1+XIAFS_ADDRS_PER_Z(inode->i_sb))*XIAFS_ADDRS_PER_Z(inode->i_sb)) { - printk("XIA-FS: zone > big (%s %d)\n", WHERE_ERR); - return 0; - } - if (!IS_RDONLY (inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } - if (zone < 8) - return inode->u.xiafs_i.i_zone[zone]; - zone -= 8; - if (zone < XIAFS_ADDRS_PER_Z(inode->i_sb)) { - i = inode->u.xiafs_i.i_ind_zone; - if (i) - i = zone_bmap(bread(inode->i_dev, i, XIAFS_ZSIZE(inode->i_sb)), zone); - return i; - } - zone -= XIAFS_ADDRS_PER_Z(inode->i_sb); - i = inode->u.xiafs_i.i_dind_zone; - if (i) - i = zone_bmap(bread(inode->i_dev, i, XIAFS_ZSIZE(inode->i_sb)), - zone >> XIAFS_ADDRS_PER_Z_BITS(inode->i_sb)); - if (i) - i= zone_bmap(bread(inode->i_dev,i, XIAFS_ZSIZE(inode->i_sb)), - zone & (XIAFS_ADDRS_PER_Z(inode->i_sb)-1)); - return i; -} - -static u_long get_prev_addr(struct inode * inode, int zone) -{ - u_long tmp; - - if (zone > 0) - while (--zone >= 0) /* only files with holes suffer */ - if ((tmp=xiafs_bmap(inode, zone))) - return tmp; - random_nr=(random_nr+23)%inode->i_sb->u.xiafs_sb.s_ndatazones; - return random_nr + inode->i_sb->u.xiafs_sb.s_firstdatazone; -} - -static struct buffer_head * -dt_getblk(struct inode * inode, u_long *lp, int create, u_long prev_addr) -{ - int tmp; - struct buffer_head * result; - -repeat: - if ((tmp=*lp)) { - result = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); - if (tmp == *lp) - return result; - brelse(result); - goto repeat; - } - if (!create) - return NULL; - tmp = xiafs_new_zone(inode->i_sb, prev_addr); - if (!tmp) - return NULL; - result = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); - if (*lp) { - xiafs_free_zone(inode->i_sb, tmp); - brelse(result); - goto repeat; - } - *lp = tmp; - inode->i_blocks+=2 << XIAFS_ZSHIFT(inode->i_sb); - return result; -} - -static struct buffer_head * -indt_getblk(struct inode * inode, struct buffer_head * bh, - int nr, int create, u_long prev_addr) -{ - int tmp; - u_long *lp; - struct buffer_head * result; - - if (!bh) - return NULL; - if (!buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - return NULL; - } - } - lp = nr + (u_long *) bh->b_data; -repeat: - if ((tmp=*lp)) { - result = getblk(bh->b_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); - if (tmp == *lp) { - brelse(bh); - return result; - } - brelse(result); - goto repeat; - } - if (!create) { - brelse(bh); - return NULL; - } - tmp = xiafs_new_zone(inode->i_sb, prev_addr); - if (!tmp) { - brelse(bh); - return NULL; - } - result = getblk(bh->b_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); - if (*lp) { - xiafs_free_zone(inode->i_sb, tmp); - brelse(result); - goto repeat; - } - *lp = tmp; - inode->i_blocks+=2 << XIAFS_ZSHIFT(inode->i_sb); - mark_buffer_dirty(bh, 1); - brelse(bh); - return result; -} - -struct buffer_head * xiafs_getblk(struct inode * inode, int zone, int create) -{ - struct buffer_head * bh; - u_long prev_addr=0; - - if (zone<0) { - printk("XIA-FS: zone < 0 (%s %d)\n", WHERE_ERR); - return NULL; - } - if (zone >= 8+(1+XIAFS_ADDRS_PER_Z(inode->i_sb))*XIAFS_ADDRS_PER_Z(inode->i_sb)) { - if (!create) - printk("XIA-FS: zone > big (%s %d)\n", WHERE_ERR); - return NULL; - } - if (create) - prev_addr=get_prev_addr(inode, zone); - if (zone < 8) - return dt_getblk(inode, zone+inode->u.xiafs_i.i_zone, create, prev_addr); - zone -= 8; - if (zone < XIAFS_ADDRS_PER_Z(inode->i_sb)) { - bh = dt_getblk(inode, &(inode->u.xiafs_i.i_ind_zone), create, prev_addr); - bh = indt_getblk(inode, bh, zone, create, prev_addr); - return bh; - } - zone -= XIAFS_ADDRS_PER_Z(inode->i_sb); - bh = dt_getblk(inode, &(inode->u.xiafs_i.i_dind_zone), create, prev_addr); - bh = indt_getblk(inode, bh, zone>>XIAFS_ADDRS_PER_Z_BITS(inode->i_sb), - create, prev_addr); - bh = indt_getblk(inode, bh, zone&(XIAFS_ADDRS_PER_Z(inode->i_sb)-1), - create, prev_addr); - return bh; -} - -struct buffer_head * xiafs_bread(struct inode * inode, int zone, int create) -{ - struct buffer_head * bh; - - bh = xiafs_getblk(inode, zone, create); - if (!bh || buffer_uptodate(bh)) - return bh; - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return bh; - brelse(bh); - return NULL; -} - -void xiafs_read_inode(struct inode * inode) -{ - struct buffer_head * bh; - struct xiafs_inode * raw_inode; - int zone; - ino_t ino; - - ino = inode->i_ino; - inode->i_op = NULL; - inode->i_mode=0; - if (!ino || ino > inode->i_sb->u.xiafs_sb.s_ninodes) { - printk("XIA-FS: bad inode number (%s %d)\n", WHERE_ERR); - return; - } - zone = 1 + inode->i_sb->u.xiafs_sb.s_imap_zones + - inode->i_sb->u.xiafs_sb.s_zmap_zones + - (ino-1)/ XIAFS_INODES_PER_Z(inode->i_sb); - if (!(bh=bread(inode->i_dev, zone, XIAFS_ZSIZE(inode->i_sb)))) { - printk("XIA-FS: read i-node zone failed (%s %d)\n", WHERE_ERR); - return; - } - raw_inode = ((struct xiafs_inode *) bh->b_data) + - ((ino-1) & (XIAFS_INODES_PER_Z(inode->i_sb) - 1)); - inode->i_mode = raw_inode->i_mode; - inode->i_uid = raw_inode->i_uid; - inode->i_gid = raw_inode->i_gid; - inode->i_nlink = raw_inode->i_nlinks; - inode->i_size = raw_inode->i_size; - inode->i_mtime = raw_inode->i_mtime; - inode->i_atime = raw_inode->i_atime; - inode->i_ctime = raw_inode->i_ctime; - inode->i_blksize = XIAFS_ZSIZE(inode->i_sb); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - inode->i_blocks=0; - inode->i_rdev = to_kdev_t(raw_inode->i_zone[0]); - } else { - XIAFS_GET_BLOCKS(raw_inode, inode->i_blocks); - for (zone = 0; zone < 8; zone++) - inode->u.xiafs_i.i_zone[zone] = raw_inode->i_zone[zone] & 0xffffff; - inode->u.xiafs_i.i_ind_zone = raw_inode->i_ind_zone & 0xffffff; - inode->u.xiafs_i.i_dind_zone = raw_inode->i_dind_zone & 0xffffff; - } - brelse(bh); - if (S_ISREG(inode->i_mode)) - inode->i_op = &xiafs_file_inode_operations; - else if (S_ISDIR(inode->i_mode)) - inode->i_op = &xiafs_dir_inode_operations; - else if (S_ISLNK(inode->i_mode)) - inode->i_op = &xiafs_symlink_inode_operations; - else if (S_ISCHR(inode->i_mode)) - inode->i_op = &chrdev_inode_operations; - else if (S_ISBLK(inode->i_mode)) - inode->i_op = &blkdev_inode_operations; - else if (S_ISFIFO(inode->i_mode)) - init_fifo(inode); -} - -static struct buffer_head * xiafs_update_inode(struct inode * inode) -{ - struct buffer_head * bh; - struct xiafs_inode * raw_inode; - int zone; - ino_t ino; - - if (IS_RDONLY (inode)) { - printk("XIA-FS: write_inode on a read-only filesystem (%s %d)\n", WHERE_ERR); - inode->i_dirt = 0; - return 0; - } - - ino = inode->i_ino; - if (!ino || ino > inode->i_sb->u.xiafs_sb.s_ninodes) { - printk("XIA-FS: bad inode number (%s %d)\n", WHERE_ERR); - inode->i_dirt=0; - return 0; - } - zone = 1 + inode->i_sb->u.xiafs_sb.s_imap_zones + - inode->i_sb->u.xiafs_sb.s_zmap_zones + - (ino-1) / XIAFS_INODES_PER_Z(inode->i_sb); - if (!(bh=bread(inode->i_dev, zone, XIAFS_ZSIZE(inode->i_sb)))) { - printk("XIA-FS: read i-node zone failed (%s %d)\n", WHERE_ERR); - inode->i_dirt=0; - return 0; - } - raw_inode = ((struct xiafs_inode *)bh->b_data) + - ((ino-1) & (XIAFS_INODES_PER_Z(inode->i_sb) -1)); - raw_inode->i_mode = inode->i_mode; - raw_inode->i_uid = inode->i_uid; - raw_inode->i_gid = inode->i_gid; - raw_inode->i_nlinks = inode->i_nlink; - raw_inode->i_size = inode->i_size; - raw_inode->i_atime = inode->i_atime; - raw_inode->i_ctime = inode->i_ctime; - raw_inode->i_mtime = inode->i_mtime; - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - raw_inode->i_zone[0] = kdev_t_to_nr(inode->i_rdev); - else { - XIAFS_PUT_BLOCKS(raw_inode, inode->i_blocks); - for (zone = 0; zone < 8; zone++) - raw_inode->i_zone[zone] = (raw_inode->i_zone[zone] & 0xff000000) - | (inode->u.xiafs_i.i_zone[zone] & 0xffffff); - raw_inode->i_ind_zone = (raw_inode->i_ind_zone & 0xff000000) - | (inode->u.xiafs_i.i_ind_zone & 0xffffff); - raw_inode->i_dind_zone = (raw_inode->i_dind_zone & 0xff000000) - | (inode->u.xiafs_i.i_dind_zone & 0xffffff); - } - inode->i_dirt=0; - mark_buffer_dirty(bh, 1); - return bh; -} - - -void xiafs_write_inode(struct inode * inode) -{ - struct buffer_head * bh; - bh = xiafs_update_inode(inode); - brelse (bh); -} - -int xiafs_sync_inode (struct inode *inode) -{ - int err = 0; - struct buffer_head *bh; - - bh = xiafs_update_inode(inode); - if (bh && buffer_dirty(bh)) - { - ll_rw_block(WRITE, 1, &bh); - wait_on_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - { - printk ("IO error syncing xiafs inode [%s:%lu]\n", - kdevname(inode->i_dev), inode->i_ino); - err = -1; - } - } - else if (!bh) - err = -1; - brelse (bh); - return err; -} - -/* Every kernel module contains stuff like this. */ - -static struct file_system_type xiafs_fs_type = { - xiafs_read_super, "xiafs", 1, NULL -}; - -int init_xiafs_fs(void) -{ - return register_filesystem(&xiafs_fs_type); -} - -#ifdef MODULE -int init_module(void) -{ - int status; - - if ((status = init_xiafs_fs()) == 0) - register_symtab(0); - return status; -} - -void cleanup_module(void) -{ - unregister_filesystem(&xiafs_fs_type); -} - -#endif diff --git a/fs/xiafs/namei.c b/fs/xiafs/namei.c deleted file mode 100644 index b23c4bf71..000000000 --- a/fs/xiafs/namei.c +++ /dev/null @@ -1,854 +0,0 @@ -/* - * Linux/fs/xiafs/namei.c - * - * Copyright (C) Q. Frank Xia, 1993. - * - * Based on Linus' minix/namei.c - * Copyright (C) Linus Torvalds, 1991, 1992. - * - * This software may be redistributed per Linux Copyright. - */ - -#include <linux/sched.h> -#include <linux/xia_fs.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/errno.h> - -#include <asm/uaccess.h> - -#include "xiafs_mac.h" - -#define RNDUP4(x) ((3+(u_long)(x)) & ~3) -/* - * ok, we cannot use strncmp, as the name is not in our data space. - * Thus we'll have to use xiafs_match. No big problem. Match also makes - * some sanity tests. - * - * NOTE! unlike strncmp, xiafs_match returns 1 for success, 0 for failure. - */ -static int xiafs_match(int len, const char * name, struct xiafs_direct * dep) -{ - int i; - - if (!dep || !dep->d_ino || len > _XIAFS_NAME_LEN) - return 0; - /* "" means "." ---> so paths like "/usr/lib//libc.a" work */ - if (!len && (dep->d_name[0]=='.') && (dep->d_name[1]=='\0')) - return 1; - if (len != dep->d_name_len) - return 0; - for (i=0; i < len; i++) - if (*name++ != dep->d_name[i]) - return 0; - return 1; -} - -/* - * xiafs_find_entry() - * - * finds an entry in the specified directory with the wanted name. It - * returns the cache buffer in which the entry was found, and the entry - * itself (as a parameter - res_dir). It does NOT read the inode of the - * entry - you'll have to do that yourself if you want to. - */ -static struct buffer_head * -xiafs_find_entry(struct inode * inode, const char * name, int namelen, - struct xiafs_direct ** res_dir, struct xiafs_direct ** res_pre) -{ - int i, zones, pos; - struct buffer_head * bh; - struct xiafs_direct * dep, * dep_pre; - - *res_dir = NULL; - if (!inode) - return NULL; - if (namelen > _XIAFS_NAME_LEN) - return NULL; - - if (inode->i_size & (XIAFS_ZSIZE(inode->i_sb) - 1)) { - printk("XIA-FS: bad dir size (%s %d)\n", WHERE_ERR); - return NULL; - } - zones=inode->i_size >> XIAFS_ZSIZE_BITS(inode->i_sb); - for (i=0; i < zones; i++ ) { - bh = xiafs_bread(inode, i, 0); - if (!bh) - continue; - dep_pre=dep=(struct xiafs_direct *)bh->b_data; - if (!i && (dep->d_rec_len != 12 || !dep->d_ino || - dep->d_name_len != 1 || strcmp(dep->d_name, "."))) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - brelse(bh); - return NULL; - } - pos = 0; - while ( pos < XIAFS_ZSIZE(inode->i_sb) ) { - if (dep->d_ino > inode->i_sb->u.xiafs_sb.s_ninodes || - dep->d_rec_len < 12 || - dep->d_rec_len+(char *)dep > bh->b_data+XIAFS_ZSIZE(inode->i_sb) || - dep->d_name_len + 8 > dep->d_rec_len || dep->d_name_len <= 0 || - dep->d_name[dep->d_name_len] ) { - brelse(bh); - return NULL; - } - if (xiafs_match(namelen, name, dep)) { - *res_dir=dep; - if (res_pre) - *res_pre=dep_pre; - return bh; - } - pos += dep->d_rec_len; - dep_pre=dep; - dep=(struct xiafs_direct *)(bh->b_data + pos); - } - brelse(bh); - if (pos > XIAFS_ZSIZE(inode->i_sb)) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - return NULL; - } - } - return NULL; -} - -int xiafs_lookup(struct inode * dir, const char * name, int len, - struct inode ** result) -{ - int ino; - struct xiafs_direct * dep; - struct buffer_head * bh; - - *result = NULL; - if (!dir) - return -ENOENT; - if (!S_ISDIR(dir->i_mode)) { - iput(dir); - return -ENOENT; - } - if (!(bh = xiafs_find_entry(dir, name, len, &dep, NULL))) { - iput(dir); - return -ENOENT; - } - ino = dep->d_ino; - brelse(bh); - if (!(*result = iget(dir->i_sb, ino))) { - iput(dir); - return -EACCES; - } - iput(dir); - return 0; -} - -/* - * xiafs_add_entry() - * - * adds a file entry to the specified directory, using the same - * semantics as xiafs_find_entry(). It returns NULL if it failed. - * - * NOTE!! The inode part of 'de' is left at 0 - which means you - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -static struct buffer_head * xiafs_add_entry(struct inode * dir, - const char * name, int namelen, struct xiafs_direct ** res_dir, - struct xiafs_direct ** res_pre) -{ - int i, pos, offset; - struct buffer_head * bh; - struct xiafs_direct * de, * de_pre; - - *res_dir = NULL; - if (!dir || !namelen || namelen > _XIAFS_NAME_LEN) - return NULL; - - if (dir->i_size & (XIAFS_ZSIZE(dir->i_sb) - 1)) { - printk("XIA-FS: bad dir size (%s %d)\n", WHERE_ERR); - return NULL; - } - pos=0; - for ( ; ; ) { - bh = xiafs_bread(dir, pos >> XIAFS_ZSIZE_BITS(dir->i_sb), pos ? 1:0); - if (!bh) - return NULL; - de_pre=de=(struct xiafs_direct *)bh->b_data; - if (!pos) { - if (de->d_rec_len != 12 || !de->d_ino || de->d_name_len != 1 || - strcmp(de->d_name, ".")) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - brelse(bh); - return NULL; - } - offset = 12; - de_pre=de=(struct xiafs_direct *)(bh->b_data+12); - } else - offset = 0; - while (offset < XIAFS_ZSIZE(dir->i_sb)) { - if (pos >= dir->i_size) { - de->d_ino=0; - de->d_name_len=0; - de->d_name[0]=0; - de->d_rec_len=XIAFS_ZSIZE(dir->i_sb); - dir->i_size += XIAFS_ZSIZE(dir->i_sb); - dir->i_dirt = 1; - } else { - if (de->d_ino > dir->i_sb->u.xiafs_sb.s_ninodes || - de->d_rec_len < 12 || - (char *)de+de->d_rec_len > bh->b_data+XIAFS_ZSIZE(dir->i_sb) || - de->d_name_len + 8 > de->d_rec_len || - de->d_name[de->d_name_len]) { - printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR); - brelse(bh); - return NULL; - } - if (de->d_ino && - RNDUP4(de->d_name_len)+RNDUP4(namelen)+16<=de->d_rec_len) { - i=RNDUP4(de->d_name_len)+8; - de_pre=de; - de=(struct xiafs_direct *)(i+(u_char *)de_pre); - de->d_ino=0; - de->d_rec_len=de_pre->d_rec_len-i; - de_pre->d_rec_len=i; - } - } - if (!de->d_ino && RNDUP4(namelen)+8 <= de->d_rec_len) { - /* - * XXX all times should be set by caller upon successful - * completion. - */ - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - dir->i_dirt = 1; - memcpy(de->d_name, name, namelen); - de->d_name[namelen]=0; - de->d_name_len=namelen; - mark_buffer_dirty(bh, 1); - *res_dir = de; - if (res_pre) - *res_pre = de_pre; - return bh; - } - offset+=de->d_rec_len; - de_pre=de; - de=(struct xiafs_direct *)(bh->b_data+offset); - } - brelse(bh); - if (offset > XIAFS_ZSIZE(dir->i_sb)) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - return NULL; - } - pos+=XIAFS_ZSIZE(dir->i_sb); - } - return NULL; -} - -int xiafs_create(struct inode * dir, const char * name, int len, int mode, - struct inode ** result) -{ - struct inode * inode; - struct buffer_head * bh; - struct xiafs_direct * de; - - *result = NULL; - if (!dir) - return -ENOENT; - inode = xiafs_new_inode(dir); - if (!inode) { - iput(dir); - return -ENOSPC; - } - inode->i_op = &xiafs_file_inode_operations; - inode->i_mode = mode; - inode->i_dirt = 1; - bh = xiafs_add_entry(dir, name, len, &de, NULL); - if (!bh) { - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - iput(dir); - return -ENOSPC; - } - de->d_ino = inode->i_ino; - mark_buffer_dirty(bh, 1); - brelse(bh); - iput(dir); - *result = inode; - return 0; -} - -int xiafs_mknod(struct inode *dir, const char *name, int len, int mode, int rdev) -{ - struct inode * inode; - struct buffer_head * bh; - struct xiafs_direct * de; - - if (!dir) - return -ENOENT; - bh = xiafs_find_entry(dir,name,len,&de, NULL); - if (bh) { - brelse(bh); - iput(dir); - return -EEXIST; - } - inode = xiafs_new_inode(dir); - if (!inode) { - iput(dir); - return -ENOSPC; - } - inode->i_uid = current->fsuid; - inode->i_mode = mode; - inode->i_op = NULL; - if (S_ISREG(inode->i_mode)) - inode->i_op = &xiafs_file_inode_operations; - else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &xiafs_dir_inode_operations; - if (dir->i_mode & S_ISGID) - inode->i_mode |= S_ISGID; - } - else if (S_ISLNK(inode->i_mode)) - inode->i_op = &xiafs_symlink_inode_operations; - else if (S_ISCHR(inode->i_mode)) - inode->i_op = &chrdev_inode_operations; - else if (S_ISBLK(inode->i_mode)) - inode->i_op = &blkdev_inode_operations; - else if (S_ISFIFO(inode->i_mode)) - init_fifo(inode); - if (S_ISBLK(mode) || S_ISCHR(mode)) - inode->i_rdev = to_kdev_t(rdev); - inode->i_atime = inode->i_ctime = inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - bh = xiafs_add_entry(dir, name, len, &de, NULL); - if (!bh) { - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - iput(dir); - return -ENOSPC; - } - de->d_ino = inode->i_ino; - mark_buffer_dirty(bh, 1); - brelse(bh); - iput(dir); - iput(inode); - return 0; -} - -int xiafs_mkdir(struct inode * dir, const char * name, int len, int mode) -{ - struct inode * inode; - struct buffer_head * bh, *dir_block; - struct xiafs_direct * de; - - bh = xiafs_find_entry(dir,name,len,&de, NULL); - if (bh) { - brelse(bh); - iput(dir); - return -EEXIST; - } - if (dir->i_nlink > 64000) { - iput(dir); - return -EMLINK; - } - inode = xiafs_new_inode(dir); - if (!inode) { - iput(dir); - return -ENOSPC; - } - inode->i_op = &xiafs_dir_inode_operations; - inode->i_size = XIAFS_ZSIZE(dir->i_sb); - inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME; - inode->i_dirt = 1; - dir_block = xiafs_bread(inode,0,1); - if (!dir_block) { - iput(dir); - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - return -ENOSPC; - } - de = (struct xiafs_direct *) dir_block->b_data; - de->d_ino=inode->i_ino; - strcpy(de->d_name,"."); - de->d_name_len=1; - de->d_rec_len=12; - de =(struct xiafs_direct *)(12 + dir_block->b_data); - de->d_ino = dir->i_ino; - strcpy(de->d_name,".."); - de->d_name_len=2; - de->d_rec_len=XIAFS_ZSIZE(dir->i_sb)-12; - inode->i_nlink = 2; - mark_buffer_dirty(dir_block, 1); - brelse(dir_block); - inode->i_mode = S_IFDIR | (mode & S_IRWXUGO & ~current->fs->umask); - if (dir->i_mode & S_ISGID) - inode->i_mode |= S_ISGID; - inode->i_dirt = 1; - bh = xiafs_add_entry(dir, name, len, &de, NULL); - if (!bh) { - iput(dir); - inode->i_nlink=0; - iput(inode); - return -ENOSPC; - } - de->d_ino = inode->i_ino; - mark_buffer_dirty(bh, 1); - dir->i_nlink++; - dir->i_dirt = 1; - iput(dir); - iput(inode); - brelse(bh); - return 0; -} - -/* - * routine to check that the specified directory is empty (for rmdir) - */ -static int empty_dir(struct inode * inode) -{ - int i, zones, offset; - struct buffer_head * bh; - struct xiafs_direct * de; - - if (inode->i_size & (XIAFS_ZSIZE(inode->i_sb)-1) ) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - return 1; - } - - zones=inode->i_size >> XIAFS_ZSIZE_BITS(inode->i_sb); - for (i=0; i < zones; i++) { - bh = xiafs_bread(inode, i, 0); - if (!i) { - if (!bh) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - return 1; - } - de=(struct xiafs_direct *)bh->b_data; - if (de->d_ino != inode->i_ino || strcmp(".", de->d_name) || - de->d_rec_len != 12 ) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - brelse(bh); - return 1; - } - de=(struct xiafs_direct *)(12 + bh->b_data); - if (!de->d_ino || strcmp("..", de->d_name)) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - brelse(bh); - return 1; - } - offset=de->d_rec_len+12; - } - else - offset = 0; - if (!bh) - continue; - while (offset < XIAFS_ZSIZE(inode->i_sb)) { - de=(struct xiafs_direct *)(bh->b_data+offset); - if (de->d_ino > inode->i_sb->u.xiafs_sb.s_ninodes || - de->d_rec_len < 12 || - (char *)de+de->d_rec_len > bh->b_data+XIAFS_ZSIZE(inode->i_sb) || - de->d_name_len + 8 > de->d_rec_len || - de->d_name[de->d_name_len]) { - printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); - brelse(bh); - return 1; - } - if (de->d_ino) { - brelse(bh); - return 0; - } - offset+=de->d_rec_len; - } - brelse(bh); - } - return 1; -} - -static void xiafs_rm_entry(struct xiafs_direct *de, struct xiafs_direct * de_pre) -{ - if (de==de_pre) { - de->d_ino=0; - return; - } - while (de_pre->d_rec_len+(u_char *)de_pre < (u_char *)de) { - if (de_pre->d_rec_len < 12) { - printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR); - return; - } - de_pre=(struct xiafs_direct *)(de_pre->d_rec_len+(u_char *)de_pre); - } - if (de_pre->d_rec_len+(u_char *)de_pre > (u_char *)de) { - printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR); - return; - } - de_pre->d_rec_len+=de->d_rec_len; -} - -int xiafs_rmdir(struct inode * dir, const char * name, int len) -{ - int retval; - struct inode * inode; - struct buffer_head * bh; - struct xiafs_direct * de, * de_pre; - - inode = NULL; - bh = xiafs_find_entry(dir, name, len, &de, &de_pre); - retval = -ENOENT; - if (!bh) - goto end_rmdir; - retval = -EPERM; - if (!(inode = iget(dir->i_sb, de->d_ino))) - goto end_rmdir; - if ((dir->i_mode & S_ISVTX) && !fsuser() && - current->fsuid != inode->i_uid && - current->fsuid != dir->i_uid) - goto end_rmdir; - if (inode->i_dev != dir->i_dev) - goto end_rmdir; - if (inode == dir) /* we may not delete ".", but "../dir" is ok */ - goto end_rmdir; - if (!S_ISDIR(inode->i_mode)) { - retval = -ENOTDIR; - goto end_rmdir; - } - if (!empty_dir(inode)) { - retval = -ENOTEMPTY; - goto end_rmdir; - } - if (inode->i_count > 1) { - retval = -EBUSY; - goto end_rmdir; - } - if (inode->i_nlink != 2) - printk("XIA-FS: empty directory has nlink!=2 (%s %d)\n", WHERE_ERR); - xiafs_rm_entry(de, de_pre); - mark_buffer_dirty(bh, 1); - inode->i_nlink=0; - inode->i_dirt=1; - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - dir->i_dirt=1; - retval = 0; -end_rmdir: - iput(dir); - iput(inode); - brelse(bh); - return retval; -} - -int xiafs_unlink(struct inode * dir, const char * name, int len) -{ - int retval; - struct inode * inode; - struct buffer_head * bh; - struct xiafs_direct * de, * de_pre; - -repeat: - retval = -ENOENT; - inode = NULL; - bh = xiafs_find_entry(dir, name, len, &de, &de_pre); - if (!bh) - goto end_unlink; - if (!(inode = iget(dir->i_sb, de->d_ino))) - goto end_unlink; - retval = -EPERM; - if (S_ISDIR(inode->i_mode)) - goto end_unlink; - if (de->d_ino != inode->i_ino) { - iput(inode); - brelse(bh); - current->counter = 0; - schedule(); - goto repeat; - } - if ((dir->i_mode & S_ISVTX) && !fsuser() && - current->fsuid != inode->i_uid && - current->fsuid != dir->i_uid) - goto end_unlink; - if (!inode->i_nlink) { - printk("XIA-FS: Deleting nonexistent file (%s %d)\n", WHERE_ERR); - inode->i_nlink=1; - } - xiafs_rm_entry(de, de_pre); - mark_buffer_dirty(bh, 1); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - dir->i_dirt = 1; - inode->i_nlink--; - inode->i_dirt = 1; - retval = 0; -end_unlink: - brelse(bh); - iput(inode); - iput(dir); - return retval; -} - -int xiafs_symlink(struct inode * dir, const char * name, - int len, const char * symname) -{ - struct xiafs_direct * de; - struct inode * inode = NULL; - struct buffer_head * bh = NULL, * name_block = NULL; - int i; - char c; - - bh = xiafs_find_entry(dir,name,len, &de, NULL); - if (bh) { - brelse(bh); - iput(dir); - return -EEXIST; - } - if (!(inode = xiafs_new_inode(dir))) { - iput(dir); - return -ENOSPC; - } - inode->i_mode = S_IFLNK | S_IRWXUGO; - inode->i_op = &xiafs_symlink_inode_operations; - name_block = xiafs_bread(inode,0,1); - if (!name_block) { - iput(dir); - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - return -ENOSPC; - } - for (i = 0; i < BLOCK_SIZE-1 && (c=*symname++); i++) - name_block->b_data[i] = c; - name_block->b_data[i] = 0; - mark_buffer_dirty(name_block, 1); - brelse(name_block); - inode->i_size = i; - inode->i_dirt = 1; - bh = xiafs_add_entry(dir, name, len, &de, NULL); - if (!bh) { - inode->i_nlink--; - inode->i_dirt = 1; - iput(inode); - iput(dir); - return -ENOSPC; - } - de->d_ino = inode->i_ino; - mark_buffer_dirty(bh, 1); - brelse(bh); - iput(dir); - iput(inode); - return 0; -} - -int xiafs_link(struct inode * oldinode, struct inode * dir, - const char * name, int len) -{ - struct xiafs_direct * de; - struct buffer_head * bh; - - if (S_ISDIR(oldinode->i_mode)) { - iput(oldinode); - iput(dir); - return -EPERM; - } - if (oldinode->i_nlink > 64000) { - iput(oldinode); - iput(dir); - return -EMLINK; - } - bh = xiafs_find_entry(dir, name, len, &de, NULL); - if (bh) { - brelse(bh); - iput(dir); - iput(oldinode); - return -EEXIST; - } - bh = xiafs_add_entry(dir, name, len, &de, NULL); - if (!bh) { - iput(dir); - iput(oldinode); - return -ENOSPC; - } - de->d_ino = oldinode->i_ino; - mark_buffer_dirty(bh, 1); - brelse(bh); - iput(dir); - oldinode->i_nlink++; - oldinode->i_ctime = CURRENT_TIME; - oldinode->i_dirt = 1; - iput(oldinode); - return 0; -} - -static int subdir(struct inode * new_inode, struct inode * old_inode) -{ - int ino; - int result; - - new_inode->i_count++; - result = 0; - for (;;) { - if (new_inode == old_inode) { - result = 1; - break; - } - if (new_inode->i_dev != old_inode->i_dev) - break; - ino = new_inode->i_ino; - if (xiafs_lookup(new_inode,"..",2,&new_inode)) - break; - if (new_inode->i_ino == ino) - break; - } - iput(new_inode); - return result; -} - -#define PARENT_INO(buffer) \ - (((struct xiafs_direct *) ((u_char *)(buffer) + 12))->d_ino) - -/* - * rename uses retry to avoid race-conditions: at least they should be minimal. - * it tries to allocate all the blocks, then sanity-checks, and if the sanity- - * checks fail, it tries to restart itself again. Very practical - no changes - * are done until we know everything works ok.. and then all the changes can be - * done in one fell swoop when we have claimed all the buffers needed. - * - * Anybody can rename anything with this: the permission checks are left to the - * higher-level routines. - */ -static int do_xiafs_rename(struct inode * old_dir, const char * old_name, - int old_len, struct inode * new_dir, - const char * new_name, int new_len, - int must_be_dir) -{ - struct inode * old_inode, * new_inode; - struct buffer_head * old_bh, * new_bh, * dir_bh; - struct xiafs_direct * old_de, * old_de_pre, * new_de, * new_de_pre; - int retval; - -try_again: - old_inode = new_inode = NULL; - old_bh = new_bh = dir_bh = NULL; - old_bh = xiafs_find_entry(old_dir, old_name, old_len, &old_de, &old_de_pre); - retval = -ENOENT; - if (!old_bh) - goto end_rename; - old_inode = __iget(old_dir->i_sb, old_de->d_ino, 0); /* don't cross mnt-points */ - if (!old_inode) - goto end_rename; - if (must_be_dir && !S_ISDIR(old_inode->i_mode)) - goto end_rename; - retval = -EPERM; - if ((old_dir->i_mode & S_ISVTX) && - current->fsuid != old_inode->i_uid && - current->fsuid != old_dir->i_uid && !fsuser()) - goto end_rename; - new_bh = xiafs_find_entry(new_dir, new_name, new_len, &new_de, NULL); - if (new_bh) { - new_inode = __iget(new_dir->i_sb, new_de->d_ino, 0); - if (!new_inode) { - brelse(new_bh); - new_bh = NULL; - } - } - if (new_inode == old_inode) { - retval = 0; - goto end_rename; - } - if (new_inode && S_ISDIR(new_inode->i_mode)) { - retval = -EEXIST; - goto end_rename; - } - retval = -EPERM; - if (new_inode && (new_dir->i_mode & S_ISVTX) && - current->fsuid != new_inode->i_uid && - current->fsuid != new_dir->i_uid && !fsuser()) - goto end_rename; - if (S_ISDIR(old_inode->i_mode)) { - retval = -EEXIST; - if (new_bh) - goto end_rename; - if ((retval = permission(old_inode, MAY_WRITE)) != 0) - goto end_rename; - retval = -EINVAL; - if (subdir(new_dir, old_inode)) - goto end_rename; - retval = -EIO; - dir_bh = xiafs_bread(old_inode,0,0); - if (!dir_bh) - goto end_rename; - if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino) - goto end_rename; - retval = -EMLINK; - if (new_dir->i_nlink > 64000) - goto end_rename; - } - if (!new_bh) - new_bh = xiafs_add_entry(new_dir, new_name, new_len, &new_de, &new_de_pre); - retval = -ENOSPC; - if (!new_bh) - goto end_rename; - /* sanity checking */ - if ( (new_inode && (new_de->d_ino != new_inode->i_ino)) - || (new_de->d_ino && !new_inode) - || (old_de->d_ino != old_inode->i_ino)) { - xiafs_rm_entry(new_de, new_de_pre); - brelse(old_bh); - brelse(new_bh); - brelse(dir_bh); - iput(old_inode); - iput(new_inode); - current->counter=0; - schedule(); - goto try_again; - } - xiafs_rm_entry(old_de, old_de_pre); - new_de->d_ino = old_inode->i_ino; - if (new_inode) { - new_inode->i_nlink--; - new_inode->i_dirt = 1; - } - mark_buffer_dirty(old_bh, 1); - mark_buffer_dirty(new_bh, 1); - if (dir_bh) { - PARENT_INO(dir_bh->b_data) = new_dir->i_ino; - mark_buffer_dirty(dir_bh, 1); - old_dir->i_nlink--; - new_dir->i_nlink++; - old_dir->i_dirt = 1; - new_dir->i_dirt = 1; - } - retval = 0; -end_rename: - brelse(dir_bh); - brelse(old_bh); - brelse(new_bh); - iput(old_inode); - iput(new_inode); - iput(old_dir); - iput(new_dir); - return retval; -} - -/* - * Ok, rename also locks out other renames, as they can change the parent of - * a directory, and we don't want any races. Other races are checked for by - * "do_rename()", which restarts if there are inconsistencies. - * - * Note that there is no race between different filesystems: it's only within - * the same device that races occur: many renames can happen at once, as long - * as they are on different partitions. - */ -int xiafs_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len, - int must_be_dir) -{ - static struct wait_queue * wait = NULL; - static int lock = 0; - int result; - - while (lock) - sleep_on(&wait); - lock = 1; - result = do_xiafs_rename(old_dir, old_name, old_len, - new_dir, new_name, new_len, - must_be_dir); - lock = 0; - wake_up(&wait); - return result; -} diff --git a/fs/xiafs/symlink.c b/fs/xiafs/symlink.c deleted file mode 100644 index 1803ae457..000000000 --- a/fs/xiafs/symlink.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * linux/fs/xiafs/symlink.c - * - * Copyright (C) Q. Frank Xia, 1993. - * - * Based on Linus' minix/symlink.c - * Copyright (C) Linus Torvalds, 1991, 1992. - * - * This software may be redistributed per Linux Copyright. - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/xia_fs.h> -#include <linux/stat.h> - -#include <asm/uaccess.h> - -static int -xiafs_readlink(struct inode *, char *, int); - -static int -xiafs_follow_link(struct inode *, struct inode *, int, int, struct inode **); - -/* - * symlinks can't do much... - */ -struct inode_operations xiafs_symlink_inode_operations = { - NULL, /* no file-operations */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - xiafs_readlink, /* readlink */ - xiafs_follow_link, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ -}; - -static int xiafs_readlink(struct inode * inode, char * buffer, int buflen) -{ - struct buffer_head * bh; - int i; - char c; - - if (!S_ISLNK(inode->i_mode)) { - iput(inode); - return -EINVAL; - } - if (buflen > BLOCK_SIZE) - buflen = BLOCK_SIZE; - bh = xiafs_bread(inode, 0, 0); - if (!IS_RDONLY (inode)) { - inode->i_atime=CURRENT_TIME; - inode->i_dirt=1; - } - iput(inode); - if (!bh) - return 0; - for (i=0; i < buflen && (c=bh->b_data[i]); i++) - put_user(c, buffer++); - if (i < buflen-1) - put_user('\0', buffer); - brelse(bh); - return i; -} - -static int xiafs_follow_link(struct inode * dir, struct inode * inode, - int flag, int mode, struct inode ** res_inode) -{ - int error; - struct buffer_head * bh; - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput(dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput(dir); - *res_inode = inode; - return 0; - } - if (!IS_RDONLY (inode)) { - inode->i_atime=CURRENT_TIME; - inode->i_dirt=1; - } - if (current->link_count > 5) { - iput(inode); - iput(dir); - return -ELOOP; - } - if (!(bh = xiafs_bread(inode, 0, 0))) { - iput(inode); - iput(dir); - return -EIO; - } - iput(inode); - current->link_count++; - error = open_namei(bh->b_data,flag,mode,res_inode,dir); - current->link_count--; - brelse(bh); - return error; -} - - - diff --git a/fs/xiafs/truncate.c b/fs/xiafs/truncate.c deleted file mode 100644 index bdb9d39be..000000000 --- a/fs/xiafs/truncate.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * linux/fs/xiafs/truncate.c - * - * Copyright (C) Q. Frank Xia, 1993. - * - * Based on Linus' minix/truncate.c - * Copyright (C) Linus Torvalds, 1991, 1992. - * - * This software may be redistributed per Linux Copyright. - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/xia_fs.h> -#include <linux/stat.h> -#include <linux/fcntl.h> - -#include "xiafs_mac.h" - -/* - * Linus' comment: - * - * Truncate has the most races in the whole filesystem: coding it is - * a pain in the a**. Especially as I don't do any locking... - * - * The code may look a bit weird, but that's just because I've tried to - * handle things like file-size changes in a somewhat graceful manner. - * Anyway, truncating a file at the same time somebody else writes to it - * is likely to result in pretty weird behaviour... - * - * The new code handles normal truncates (size = 0) as well as the more - * general case (size = XXX). I hope. - */ - -#define DT_ZONE ((inode->i_size + XIAFS_ZSIZE(inode->i_sb) - 1) \ - >> XIAFS_ZSIZE_BITS(inode->i_sb) ) - -static int trunc_direct(struct inode * inode) -{ - u_long * lp; - struct buffer_head * bh; - int i, tmp; - int retry = 0; - -repeat: - for (i = DT_ZONE ; i < 8 ; i++) { - if (i < DT_ZONE) - goto repeat; - lp=i + inode->u.xiafs_i.i_zone; - if (!(tmp = *lp)) - continue; - bh = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); - if (i < DT_ZONE) { - brelse(bh); - goto repeat; - } - if ((bh && bh->b_count != 1) || tmp != *lp) - retry = 1; - else { - *lp = 0; - inode->i_dirt = 1; - inode->i_blocks-=2 << XIAFS_ZSHIFT(inode->i_sb); - xiafs_free_zone(inode->i_sb, tmp); - } - brelse(bh); - } - return retry; -} - -static int trunc_indirect(struct inode * inode, int addr_off, u_long * lp) -{ - -#define INDT_ZONE (DT_ZONE - addr_off) - - struct buffer_head * bh, * ind_bh; - int i, tmp; - u_long * indp; - int retry = 0; - - if ( !(tmp=*lp) ) - return 0; - ind_bh = bread(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); - if (tmp != *lp) { - brelse(ind_bh); - return 1; - } - if (!ind_bh) { - *lp = 0; - return 0; - } -repeat: - for (i = INDT_ZONE<0?0:INDT_ZONE; i < XIAFS_ADDRS_PER_Z(inode->i_sb); i++) { - if (i < INDT_ZONE) - goto repeat; - indp = i+(u_long *) ind_bh->b_data; - if (!(tmp=*indp)) - continue; - bh = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); - if (i < INDT_ZONE) { - brelse(bh); - goto repeat; - } - if ((bh && bh->b_count != 1) || tmp != *indp) - retry = 1; - else { - *indp = 0; - mark_buffer_dirty(ind_bh, 1); - inode->i_blocks-= 2 << XIAFS_ZSHIFT(inode->i_sb); - xiafs_free_zone(inode->i_sb, tmp); - } - brelse(bh); - } - indp = (u_long *) ind_bh->b_data; - for (i = 0; i < XIAFS_ADDRS_PER_Z(inode->i_sb) && !(*indp++); i++) ; - if (i >= XIAFS_ADDRS_PER_Z(inode->i_sb)) { - if (ind_bh->b_count != 1) - retry = 1; - else { - tmp = *lp; - *lp = 0; - inode->i_blocks-= 2 << XIAFS_ZSHIFT(inode->i_sb); - xiafs_free_zone(inode->i_sb, tmp); - } - } - brelse(ind_bh); - return retry; -} - -static int trunc_dindirect(struct inode * inode) -{ - -#define DINDT_ZONE \ - ((DT_ZONE-XIAFS_ADDRS_PER_Z(inode->i_sb)-8)>>XIAFS_ADDRS_PER_Z_BITS(inode->i_sb)) - - int i, tmp; - struct buffer_head * dind_bh; - u_long * dindp, * lp; - int retry = 0; - - lp = &(inode->u.xiafs_i.i_dind_zone); - if (!(tmp = *lp)) - return 0; - dind_bh = bread(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); - if (tmp != *lp) { - brelse(dind_bh); - return 1; - } - if (!dind_bh) { - *lp = 0; - return 0; - } -repeat: - for (i=DINDT_ZONE<0?0:DINDT_ZONE ; i < XIAFS_ADDRS_PER_Z(inode->i_sb) ; i ++) { - if (i < DINDT_ZONE) - goto repeat; - dindp = i+(u_long *) dind_bh->b_data; - retry |= trunc_indirect(inode, - 8+((1+i)<<XIAFS_ADDRS_PER_Z_BITS(inode->i_sb)), - dindp); - mark_buffer_dirty(dind_bh, 1); - } - dindp = (u_long *) dind_bh->b_data; - for (i = 0; i < XIAFS_ADDRS_PER_Z(inode->i_sb) && !(*dindp++); i++); - if (i >= XIAFS_ADDRS_PER_Z(inode->i_sb)) { - if (dind_bh->b_count != 1) - retry = 1; - else { - tmp = *lp; - *lp = 0; - inode->i_dirt = 1; - inode->i_blocks-=2 << XIAFS_ZSHIFT(inode->i_sb); - xiafs_free_zone(inode->i_sb, tmp); - } - } - brelse(dind_bh); - return retry; -} - -void xiafs_truncate(struct inode * inode) -{ - int retry; - - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - while (1) { - retry = trunc_direct(inode); - retry |= trunc_indirect(inode, 8, &(inode->u.xiafs_i.i_ind_zone)); - retry |= trunc_dindirect(inode); - if (!retry) - break; - current->counter = 0; - schedule(); - } - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - inode->i_dirt = 1; -} diff --git a/fs/xiafs/xiafs_mac.h b/fs/xiafs/xiafs_mac.h deleted file mode 100644 index 05af6e42a..000000000 --- a/fs/xiafs/xiafs_mac.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * linux/fs/xiafs/xiafs_mac.h - * - * Copyright (C) Q. Frank Xia, 1993. - */ - -extern char internal_error_message[]; -#define INTERN_ERR internal_error_message, __FILE__, __LINE__ -#define WHERE_ERR __FILE__, __LINE__ - -#define XIAFS_ZSHIFT(sp) ((sp)->u.xiafs_sb.s_zone_shift) -#define XIAFS_ZSIZE(sp) (BLOCK_SIZE << XIAFS_ZSHIFT(sp)) -#define XIAFS_ZSIZE_BITS(sp) (BLOCK_SIZE_BITS + XIAFS_ZSHIFT(sp)) -#define XIAFS_ADDRS_PER_Z(sp) (BLOCK_SIZE >> (2 - XIAFS_ZSHIFT(sp))) -#define XIAFS_ADDRS_PER_Z_BITS(sp) (BLOCK_SIZE_BITS - 2 + XIAFS_ZSHIFT(sp)) -#define XIAFS_BITS_PER_Z(sp) (BLOCK_SIZE << (3 + XIAFS_ZSHIFT(sp))) -#define XIAFS_BITS_PER_Z_BITS(sp) (BLOCK_SIZE_BITS + 3 + XIAFS_ZSHIFT(sp)) -#define XIAFS_INODES_PER_Z(sp) (_XIAFS_INODES_PER_BLOCK << XIAFS_ZSHIFT(sp)) - -/* Use the most significant bytes of zone pointers to store block counter. */ -/* This is ugly, but it works. Note, We have another 7 bytes for "expansion". */ - -#define XIAFS_GET_BLOCKS(row_ip, blocks) \ - blocks=((((row_ip)->i_zone[0] >> 24) & 0xff )|\ - (((row_ip)->i_zone[1] >> 16) & 0xff00 )|\ - (((row_ip)->i_zone[2] >> 8) & 0xff0000 ) ) - -/* XIAFS_PUT_BLOCKS should be called before saving zone pointers */ -#define XIAFS_PUT_BLOCKS(row_ip, blocks) \ - (row_ip)->i_zone[2]=((blocks)<< 8) & 0xff000000;\ - (row_ip)->i_zone[1]=((blocks)<<16) & 0xff000000;\ - (row_ip)->i_zone[0]=((blocks)<<24) & 0xff000000 |